# pip install tensorflow
# conda install pandas
# conda install seaborn
# conda install scikit-learn
# conda install plotly
# pip install --upgrade nbformat
# conda install Jinja2
| Label | Description |
|---|---|
| 0 | T-shirt/Top |
| 1 | Trouser |
| 2 | Pullover |
| 3 | Dress |
| 4 | Coat |
| 5 | Sandal |
| 6 | Shirt |
| 7 | Sneaker |
| 8 | Bag |
| 9 | Ankle Boot |
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
# (X_train, y_train), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
data = tf.keras.datasets.fashion_mnist.load_data()
# (X_train, y_train), (X_test, y_test) = data
# split the data into train and test yet also have normal distributed classes
# split data into train , test and validation
X_train, X_test, y_train, y_test = train_test_split(data[0][0], data[0][1], test_size=0.2, random_state=42,stratify=data[0][1])
# 80% train, 20% test
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42,stratify=y_train ) # 0.2 x 0.8 = 0.16 = 16% of original data
# we have 64% train,20% test,16% validation
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)
X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)
X_val = X_val.reshape(-1, 784)
(40800, 28, 28) (12000, 28, 28) (40800,) (12000,)
X_train_df = pd.DataFrame(X_train.reshape(-1, 784))
y_train_df = pd.DataFrame(y_train)
X_train_df
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 774 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 783 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 2 | 0 | 0 | 178 | 183 | 110 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 190 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 36 | 109 | 112 | 6 | 165 | 0 | 92 | 148 | 107 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 40795 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 24 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 40796 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 40797 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | ... | 84 | 17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 40798 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 46 | ... | 151 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 40799 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
40800 rows × 784 columns
Dividing each of the pixels by 255 will normalize the pixels between 0 to 1 We normalize the pixels so that it can increase the speed of the learning process Neural Network processes inputs uses small weights values. Large inputs can disrupt or slow down learning process. It is good that we normalize the pixels
print(X_train.min(), X_train.max())
# it seems like the data set provided by tensorflow is already between 0 to 255 . We can normalize it by dividing by 255 or so called pixel normalization
X_train = X_train / 255.0
X_test = X_test / 255.0
X_val = X_val / 255.0
0 255
number of data points for each class is exactly the same. This is so that there will not be biases for each class
types = y_train_df[0].unique()
types.sort()
print(types)
print(y_train_df[0].value_counts())
# y_ train is just an array of numbers from 0 to 9 that represent the class of the image
[0 1 2 3 4 5 6 7 8 9] 4 4080 7 4080 1 4080 5 4080 9 4080 8 4080 0 4080 2 4080 6 4080 3 4080 Name: 0, dtype: int64
class_names = ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
print(class_names)
['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
array_of_each_class_position = [1,21,5,20,24,9,18,6,23,0]
# plot all images in array_of_each_class_position
plt.figure(figsize=(20,20))
for i in range(10):
plt.subplot(5,5,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.title(class_names[i])
plt.imshow(data[0][0][array_of_each_class_position[i]], cmap=plt.cm.binary)
plt.xlabel(data[0][1][array_of_each_class_position[i]])
plt.show()
X_train: uint8 NumPy array of grayscale image data with shapes (, 28, 28), containing the training data.
y_train: uint8 NumPy array of labels (integers in range 0-9) with shape (60000,) for the training data.
X_test: uint8 NumPy array of grayscale image data with shapes (10000, 28, 28), containing the test data.
y_test: uint8 NumPy array of labels (integers in range 0-9) with shape (10000,) for the test data.

# Baseline simple neural network
from tensorflow.keras.layers import Flatten
# model = keras.Sequential(
# [
# keras.Input(shape=(28, 28)),
# layers.Flatten(),
# layers.Dense(128, activation="relu"), # 128 neurons
# layers.Dense(10,activation='sigmoid') # 10 neurons as there are 10 types of fashion clothing
# ]
# )
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
# fix random seed for reproducibility
seed = 1
np.random.seed(seed)
model = Sequential()
model.add(Dense(128, input_shape=(784,), activation='relu'))
model.add(Dense(10, activation='sigmoid'))
model.summary()
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(X_train, y_train, epochs=5, verbose=2)
results=model.evaluate(X_test, y_test,verbose=2)
print("test loss,", results[0], "test acc:", results[1])
# Train your model and save its history
def plot_loss(loss,test_loss):
plt.figure()
plt.plot(loss)
plt.plot(test_loss)
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
def plot_accuracy(acc,test_acc):
plt.figure()
plt.plot(acc)
plt.plot(test_acc)
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
h_callback = model.fit(X_train, y_train, epochs = 10,
validation_data=(X_val, y_val))
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
# Plot train vs test accuracy during training
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 128) 100480
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
1275/1275 - 3s - loss: 0.5378 - accuracy: 0.8108 - 3s/epoch - 3ms/step
Epoch 2/5
1275/1275 - 2s - loss: 0.4001 - accuracy: 0.8559 - 2s/epoch - 2ms/step
Epoch 3/5
1275/1275 - 2s - loss: 0.3594 - accuracy: 0.8683 - 2s/epoch - 2ms/step
Epoch 4/5
1275/1275 - 2s - loss: 0.3324 - accuracy: 0.8784 - 2s/epoch - 2ms/step
Epoch 5/5
1275/1275 - 2s - loss: 0.3112 - accuracy: 0.8841 - 2s/epoch - 2ms/step
375/375 - 1s - loss: 0.3396 - accuracy: 0.8788 - 675ms/epoch - 2ms/step
test loss, 0.33961784839630127 test acc: 0.8787500262260437
Epoch 1/10
1275/1275 [==============================] - 4s 3ms/step - loss: 0.2968 - accuracy: 0.8916 - val_loss: 0.3539 - val_accuracy: 0.8739
Epoch 2/10
1275/1275 [==============================] - 3s 3ms/step - loss: 0.2809 - accuracy: 0.8943 - val_loss: 0.3303 - val_accuracy: 0.8792
Epoch 3/10
1275/1275 [==============================] - 3s 3ms/step - loss: 0.2706 - accuracy: 0.8988 - val_loss: 0.3356 - val_accuracy: 0.8819
Epoch 4/10
1275/1275 [==============================] - 3s 3ms/step - loss: 0.2569 - accuracy: 0.9041 - val_loss: 0.3352 - val_accuracy: 0.8783
Epoch 5/10
1275/1275 [==============================] - 3s 3ms/step - loss: 0.2473 - accuracy: 0.9091 - val_loss: 0.3422 - val_accuracy: 0.8786
Epoch 6/10
1275/1275 [==============================] - 3s 3ms/step - loss: 0.2372 - accuracy: 0.9109 - val_loss: 0.3289 - val_accuracy: 0.8853
Epoch 7/10
1275/1275 [==============================] - 3s 3ms/step - loss: 0.2282 - accuracy: 0.9152 - val_loss: 0.3323 - val_accuracy: 0.8865
Epoch 8/10
1275/1275 [==============================] - 3s 3ms/step - loss: 0.2191 - accuracy: 0.9179 - val_loss: 0.3387 - val_accuracy: 0.8874
Epoch 9/10
1275/1275 [==============================] - 3s 3ms/step - loss: 0.2145 - accuracy: 0.9194 - val_loss: 0.3142 - val_accuracy: 0.8913
Epoch 10/10
1275/1275 [==============================] - 3s 3ms/step - loss: 0.2067 - accuracy: 0.9222 - val_loss: 0.3224 - val_accuracy: 0.8885
Baseline model with at least 2 layers which performs slightly better than 1 layer However 2 layered model is clearly overfitted where testing learning curve diverges away from training learning curve and only performs a little better than model with 1 layer
# Baseline simple neural network
# model = keras.Sequential(
# [
# keras.Input(shape=(28, 28)),
# layers.Flatten(),
# layers.Dense(128, activation="relu"), # 128 neurons
# layers.Dense(10,activation='sigmoid') # 10 neurons as there are 10 types of fashion clothing
# ]
# )
#flatten X_train and X_test
model = Sequential()
model.add(Dense(128, input_shape=(784,), activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(10, activation='sigmoid'))
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()
h_callback = model.fit(X_train, y_train,epochs = 5,
validation_data=(X_val, y_val))
results=model.evaluate(X_test, y_test, batch_size=32, verbose=2)
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
# Plot train vs test accuracy during training
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_2 (Dense) (None, 128) 100480
dense_3 (Dense) (None, 64) 8256
dense_4 (Dense) (None, 10) 650
=================================================================
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
1275/1275 [==============================] - 4s 3ms/step - loss: 0.5344 - accuracy: 0.8104 - val_loss: 0.4093 - val_accuracy: 0.8582
Epoch 2/5
1275/1275 [==============================] - 4s 3ms/step - loss: 0.3914 - accuracy: 0.8562 - val_loss: 0.3620 - val_accuracy: 0.8731
Epoch 3/5
1275/1275 [==============================] - 4s 3ms/step - loss: 0.3497 - accuracy: 0.8712 - val_loss: 0.3486 - val_accuracy: 0.8751
Epoch 4/5
1275/1275 [==============================] - 4s 3ms/step - loss: 0.3241 - accuracy: 0.8800 - val_loss: 0.3468 - val_accuracy: 0.8724
Epoch 5/5
1275/1275 [==============================] - 4s 3ms/step - loss: 0.3067 - accuracy: 0.8859 - val_loss: 0.3375 - val_accuracy: 0.8751
375/375 - 1s - loss: 0.3260 - accuracy: 0.8864 - 569ms/epoch - 2ms/step
model = Sequential()
model.add(Dense(128, input_shape=(784,), activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(10, activation='sigmoid'))
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()
h_callback = model.fit(X_train, y_train,epochs = 10,
validation_data=(X_val, y_val))
results=model.evaluate(X_test, y_test, batch_size=32, verbose=2)
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
# Plot train vs test accuracy during training
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_5 (Dense) (None, 128) 100480
dense_6 (Dense) (None, 64) 8256
dense_7 (Dense) (None, 32) 2080
dense_8 (Dense) (None, 10) 330
=================================================================
Total params: 111,146
Trainable params: 111,146
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
1275/1275 [==============================] - 5s 3ms/step - loss: 0.5468 - accuracy: 0.8028 - val_loss: 0.4344 - val_accuracy: 0.8407
Epoch 2/10
1275/1275 [==============================] - 4s 3ms/step - loss: 0.3943 - accuracy: 0.8542 - val_loss: 0.3981 - val_accuracy: 0.8511
Epoch 3/10
1275/1275 [==============================] - 4s 3ms/step - loss: 0.3559 - accuracy: 0.8681 - val_loss: 0.3936 - val_accuracy: 0.8567
Epoch 4/10
1275/1275 [==============================] - 4s 3ms/step - loss: 0.3287 - accuracy: 0.8774 - val_loss: 0.3621 - val_accuracy: 0.8711
Epoch 5/10
1275/1275 [==============================] - 4s 3ms/step - loss: 0.3099 - accuracy: 0.8847 - val_loss: 0.3555 - val_accuracy: 0.8760
Epoch 6/10
1275/1275 [==============================] - 4s 3ms/step - loss: 0.2953 - accuracy: 0.8883 - val_loss: 0.3616 - val_accuracy: 0.8735
Epoch 7/10
1275/1275 [==============================] - 4s 3ms/step - loss: 0.2832 - accuracy: 0.8939 - val_loss: 0.3378 - val_accuracy: 0.8735
Epoch 8/10
1275/1275 [==============================] - 4s 3ms/step - loss: 0.2709 - accuracy: 0.8966 - val_loss: 0.3374 - val_accuracy: 0.8774
Epoch 9/10
1275/1275 [==============================] - 4s 3ms/step - loss: 0.2614 - accuracy: 0.9021 - val_loss: 0.3342 - val_accuracy: 0.8844
Epoch 10/10
1275/1275 [==============================] - 4s 3ms/step - loss: 0.2519 - accuracy: 0.9053 - val_loss: 0.3548 - val_accuracy: 0.8742
375/375 - 1s - loss: 0.3419 - accuracy: 0.8772 - 617ms/epoch - 2ms/step
pred=model.predict(X_test)
pred=np.argmax(pred,axis=1)
classifation_matrix=confusion_matrix(y_test,pred)
plt.figure(figsize=(10,10))
sns.heatmap(classifation_matrix,annot=True,fmt='d')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
375/375 [==============================] - 1s 1ms/step
pros
cons

Saily Shah — Published On January 27, 2022 and Last Modified On March 15th, 2022 Shah,S Convolutional Neural Network: An Overview Available at: https://towardsdatascience.com/convolutional-neural-network-feature-map-and-filter-visualization-f75012a5a49c[Accessed : 4 november 2022 ]
Convolve ~ combine (one function or series) with another by forming their convolution which is also summing the element-wise product of 2 matrices (Shah,S 2022)

Striding convolutions : repeating what happen above but with strides; the filter moving through the image and getting a matrix of convolved features. The main purpose of doing this is to find features of a class which will then be used for classification of the images
- final matrix(convolved matrix) produced is smaller after retrieving main features of an image
Convolving helps neural network extract features performing better than MLP.
# Functions for plotting
from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
def create_trace(x,y,ylabel,color):
trace = go.Scatter(
x = x,y = y,
name=ylabel,
marker=dict(color=color),
mode = "markers+lines",
text=x
)
return trace
def plot_accuracy_and_loss(train_model):
hist = train_model.history
acc = hist['accuracy']
val_acc = hist['val_accuracy']
loss = hist['loss']
val_loss = hist['val_loss']
epochs = list(range(1,len(acc)+1))
trace_ta = create_trace(epochs,acc,"Training accuracy", "Green")
trace_va = create_trace(epochs,val_acc,"Validation accuracy", "Red")
trace_tl = create_trace(epochs,loss,"Training loss", "Blue")
trace_vl = create_trace(epochs,val_loss,"Validation loss", "Magenta")
fig = tools.make_subplots(rows=1,cols=2, subplot_titles=('Training and validation accuracy',
'Training and validation loss'))
fig.append_trace(trace_ta,1,1)
fig.append_trace(trace_va,1,1)
fig.append_trace(trace_tl,1,2)
fig.append_trace(trace_vl,1,2)
fig['layout']['xaxis'].update(title = 'Epoch')
fig['layout']['xaxis2'].update(title = 'Epoch')
fig['layout']['yaxis'].update(title = 'Accuracy', range=[0,1])
fig['layout']['yaxis2'].update(title = 'Loss', range=[0,1])
iplot(fig, filename=f'accuracy-loss_{train_model}')
from tensorflow.keras.layers import Conv2D,Flatten
from numpy import mean
from numpy import std
from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.layers import Activation
from keras.layers import LeakyReLU
epochs = 15
num_classes = 10
seed = np.random.seed(1)
print('before',X_train.shape,X_test.shape)
print('before',y_train.shape,y_test.shape)
X_train = X_train.reshape((X_train.shape[0], 28, 28, 1))
X_test = X_test.reshape((X_test.shape[0], 28, 28, 1))
X_val = X_val.reshape((X_val.shape[0], 28, 28, 1))
y_test_label = y_test
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
y_val = to_categorical(y_val)
print('after',X_train.shape,X_test.shape)
print('after',y_train.shape,y_test.shape)
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='linear'))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
h_callback = fashion_model.fit(X_train, y_train, epochs = 10,
validation_data=(X_val, y_val))
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
before (40800, 784) (12000, 784)
before (40800,) (12000,)
after (40800, 28, 28, 1) (12000, 28, 28, 1)
after (40800, 10) (12000, 10)
Model: "sequential_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
conv2d_1 (Conv2D) (None, 24, 24, 64) 18496
flatten (Flatten) (None, 36864) 0
dense_9 (Dense) (None, 128) 4718720
dense_10 (Dense) (None, 10) 1290
=================================================================
Total params: 4,738,826
Trainable params: 4,738,826
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
1275/1275 [==============================] - 10s 5ms/step - loss: 0.5647 - accuracy: 0.8101 - val_loss: 0.5161 - val_accuracy: 0.8111
Epoch 2/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.4688 - accuracy: 0.8356 - val_loss: 0.5089 - val_accuracy: 0.8201
Epoch 3/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.4492 - accuracy: 0.8429 - val_loss: 0.4848 - val_accuracy: 0.8296
Epoch 4/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.4422 - accuracy: 0.8451 - val_loss: 0.5047 - val_accuracy: 0.8329
Epoch 5/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.4332 - accuracy: 0.8472 - val_loss: 0.4804 - val_accuracy: 0.8358
Epoch 6/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.4278 - accuracy: 0.8485 - val_loss: 0.4922 - val_accuracy: 0.8360
Epoch 7/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.4213 - accuracy: 0.8517 - val_loss: 0.4916 - val_accuracy: 0.8310
Epoch 8/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.4172 - accuracy: 0.8522 - val_loss: 0.4950 - val_accuracy: 0.8351
Epoch 9/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.4108 - accuracy: 0.8561 - val_loss: 0.4877 - val_accuracy: 0.8332
Epoch 10/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.4056 - accuracy: 0.8564 - val_loss: 0.4857 - val_accuracy: 0.8371
c:\Users\Admin\.conda\envs\gpu_env\lib\site-packages\plotly\tools.py:460: DeprecationWarning: plotly.tools.make_subplots is deprecated, please use plotly.subplots.make_subplots instead
#Understand the filters in the model
#Let us pick the first hidden layer as the layer of interest.
layer = fashion_model.layers #Conv layers at 0,
filters, biases = fashion_model.layers[0].get_weights()
print(layer[0].name, filters.shape)
# plot filters
fig1=plt.figure(figsize=(8, 12))
columns = 8
rows = 8
n_filters = 32 ## the number of filters in our first layer
for i in range(1, n_filters + 1):
f = filters[:, :, :, i-1]
fig1 =plt.subplot(rows, columns, i)
fig1.set_xticks([]) #Turn off axis
fig1.set_yticks([])
plt.imshow(f[:, :, 0], cmap='gray') #Show only the filters from 0th channel (R)
#ix += 1
plt.show()
conv2d (3, 3, 1, 32)
pred=fashion_model.predict(X_test)
pred=np.argmax(pred,axis=1)
classifation_matrix=confusion_matrix(y_test_label,pred)
plt.figure(figsize=(10,10))
sns.heatmap(classifation_matrix,annot=True,fmt='d')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
375/375 [==============================] - 1s 1ms/step
Machines understand binary and it makes more sense to one_hot_encode to change it to an array of 0 and 1s instead of a range of 0 to 9
Its amazing how the filter is extracting key information on the various classes, showing the lines edge and features of the images it is looking out for
# baseline cnn model for fashion mnist
from numpy import mean
from numpy import std
from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping
# evaluate a model using k-fold cross-validation
def evaluate_model(model,dataX, dataY, n_folds=5,valX=X_val,valy=y_val):
scores, histories = list(), list()
# prepare cross validation
kfold = KFold(n_folds, shuffle=True, random_state=1)
# enumerate splits
for train_ix, test_ix in kfold.split(dataX):
# select rows for train and test
trainX, trainY, testX, testY = dataX[train_ix], dataY[train_ix], dataX[test_ix], dataY[test_ix]
h_callback = EarlyStopping(monitor='val_loss', patience=5)
# fit model
history = model.fit(trainX, trainY, epochs=50, validation_data=(testX, testY), verbose=0,callbacks=[h_callback])
# evaluate model
_, acc = model.evaluate(valX, valy, verbose=0)
print('> %.3f' % (acc * 100.0))
# append scores
scores.append(acc)
histories.append(history)
return scores, histories
# plot diagnostic learning curves
def summarize_diagnostics(histories):
for i in range(len(histories)):
# plot loss
pyplot.subplot(211)
pyplot.title('Cross Entropy Loss')
pyplot.plot(histories[i].history['loss'], color='blue', label='train')
pyplot.plot(histories[i].history['val_loss'], color='orange', label='test')
# plot accuracy
pyplot.subplot(212)
pyplot.title('Classification Accuracy')
pyplot.plot(histories[i].history['accuracy'], color='blue', label='train')
pyplot.plot(histories[i].history['val_accuracy'], color='orange', label='test')
pyplot.legend()
pyplot.show()
# summarize model performance
def summarize_performance(scores):
# print summary
print('Accuracy: mean=%.3f std=%.3f, n=%d' % (mean(scores)*100, std(scores)*100, len(scores)))
# box and whisker plots of results
pyplot.boxplot(scores)
pyplot.show()
# run the test harness for evaluating a model
def run_test_harness(model,X_train, y_train):
# evaluate model
scores, histories = evaluate_model(model,X_train, y_train)
# learning curves
summarize_diagnostics(histories)
# summarize estimated performance
summarize_performance(scores)

Softmax produces a probability score for all 10 classes . If the model is not too confident about the choice it makes , it will be summed up and added to logloss. The more uncertain the model is the higher the logloss

My hypothesis is that max pooling will be better for this dataset as it completely wipes out noise in a region by taking brightest
def model_max_pooling():
# define model
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='linear'))
model.add(Dense(num_classes, activation='softmax'))
# compile model
model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
return model
run_test_harness(model_max_pooling(),X_train, y_train)
> 87.792 > 88.847 > 88.639 > 87.306 > 88.000
Accuracy: mean=88.117 std=0.563, n=5
from keras.layers import AveragePooling2D
def model_average_pooling():
# define model
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
model.add(AveragePooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='linear'))
model.add(Dense(num_classes, activation='softmax'))
# compile model
model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
return model
%%time
fashion_model_batch = Sequential()
fashion_model_batch.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model_batch.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model_batch.add(Flatten())
fashion_model_batch.add(Dense(128, activation='linear'))
fashion_model_batch.add(Dense(num_classes, activation='softmax'))
fashion_model_batch.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model_batch.summary()
h_callback = fashion_model.fit(X_train, y_train, epochs = 10, batch_size=32,validation_data=(X_test, y_test))
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
Model: "sequential_5"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_3 (Conv2D) (None, 26, 26, 32) 320
conv2d_4 (Conv2D) (None, 24, 24, 64) 18496
flatten_2 (Flatten) (None, 36864) 0
dense_13 (Dense) (None, 128) 4718720
dense_14 (Dense) (None, 10) 1290
=================================================================
Total params: 4,738,826
Trainable params: 4,738,826
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.4052 - accuracy: 0.8559 - val_loss: 0.4465 - val_accuracy: 0.8475
Epoch 2/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.4012 - accuracy: 0.8583 - val_loss: 0.4556 - val_accuracy: 0.8443
Epoch 3/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.3977 - accuracy: 0.8584 - val_loss: 0.4494 - val_accuracy: 0.8447
Epoch 4/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.3944 - accuracy: 0.8591 - val_loss: 0.4291 - val_accuracy: 0.8530
Epoch 5/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.3939 - accuracy: 0.8614 - val_loss: 0.4561 - val_accuracy: 0.8407
Epoch 6/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.3898 - accuracy: 0.8604 - val_loss: 0.4715 - val_accuracy: 0.8486
Epoch 7/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.3921 - accuracy: 0.8609 - val_loss: 0.4399 - val_accuracy: 0.8529
Epoch 8/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.3883 - accuracy: 0.8627 - val_loss: 0.4414 - val_accuracy: 0.8528
Epoch 9/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.3884 - accuracy: 0.8599 - val_loss: 0.4494 - val_accuracy: 0.8547
Epoch 10/10
1275/1275 [==============================] - 6s 5ms/step - loss: 0.3842 - accuracy: 0.8625 - val_loss: 0.4551 - val_accuracy: 0.8456
c:\Users\Admin\.conda\envs\gpu_env\lib\site-packages\plotly\tools.py:460: DeprecationWarning: plotly.tools.make_subplots is deprecated, please use plotly.subplots.make_subplots instead
CPU times: total: 1min 24s Wall time: 1min 4s
%%time
fashion_model_batch = Sequential()
fashion_model_batch.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model_batch.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model_batch.add(Flatten())
fashion_model_batch.add(Dense(128, activation='linear'))
fashion_model_batch.add(Dense(num_classes, activation='softmax'))
fashion_model_batch.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model_batch.summary()
h_callback = fashion_model.fit(X_train, y_train, epochs = 10, batch_size=64,validation_data=(X_test, y_test))
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
Model: "sequential_6"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_5 (Conv2D) (None, 26, 26, 32) 320
conv2d_6 (Conv2D) (None, 24, 24, 64) 18496
flatten_3 (Flatten) (None, 36864) 0
dense_15 (Dense) (None, 128) 4718720
dense_16 (Dense) (None, 10) 1290
=================================================================
Total params: 4,738,826
Trainable params: 4,738,826
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
638/638 [==============================] - 4s 6ms/step - loss: 0.3582 - accuracy: 0.8730 - val_loss: 0.4490 - val_accuracy: 0.8528
Epoch 2/10
638/638 [==============================] - 4s 6ms/step - loss: 0.3599 - accuracy: 0.8724 - val_loss: 0.4478 - val_accuracy: 0.8518
Epoch 3/10
638/638 [==============================] - 4s 6ms/step - loss: 0.3623 - accuracy: 0.8711 - val_loss: 0.4486 - val_accuracy: 0.8535
Epoch 4/10
638/638 [==============================] - 4s 6ms/step - loss: 0.3635 - accuracy: 0.8700 - val_loss: 0.4500 - val_accuracy: 0.8495
Epoch 5/10
638/638 [==============================] - 4s 6ms/step - loss: 0.3639 - accuracy: 0.8686 - val_loss: 0.4563 - val_accuracy: 0.8469
Epoch 6/10
638/638 [==============================] - 4s 6ms/step - loss: 0.3629 - accuracy: 0.8695 - val_loss: 0.4571 - val_accuracy: 0.8471
Epoch 7/10
638/638 [==============================] - 4s 6ms/step - loss: 0.3624 - accuracy: 0.8700 - val_loss: 0.4474 - val_accuracy: 0.8525
Epoch 8/10
638/638 [==============================] - 4s 6ms/step - loss: 0.3622 - accuracy: 0.8704 - val_loss: 0.4463 - val_accuracy: 0.8522
Epoch 9/10
638/638 [==============================] - 4s 6ms/step - loss: 0.3604 - accuracy: 0.8712 - val_loss: 0.4608 - val_accuracy: 0.8462
Epoch 10/10
638/638 [==============================] - 4s 6ms/step - loss: 0.3609 - accuracy: 0.8716 - val_loss: 0.4530 - val_accuracy: 0.8512
CPU times: total: 51.9 s Wall time: 37.8 s
%%time
fashion_model_batch = Sequential()
fashion_model_batch.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model_batch.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model_batch.add(Flatten())
fashion_model_batch.add(Dense(128, activation='linear'))
fashion_model_batch.add(Dense(num_classes, activation='softmax'))
fashion_model_batch.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model_batch.summary()
h_callback = fashion_model.fit(X_train, y_train, epochs = 10, batch_size=128,validation_data=(X_test, y_test))
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
Model: "sequential_7"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_7 (Conv2D) (None, 26, 26, 32) 320
conv2d_8 (Conv2D) (None, 24, 24, 64) 18496
flatten_4 (Flatten) (None, 36864) 0
dense_17 (Dense) (None, 128) 4718720
dense_18 (Dense) (None, 10) 1290
=================================================================
Total params: 4,738,826
Trainable params: 4,738,826
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
319/319 [==============================] - 2s 8ms/step - loss: 0.3421 - accuracy: 0.8785 - val_loss: 0.4565 - val_accuracy: 0.8542
Epoch 2/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3430 - accuracy: 0.8773 - val_loss: 0.4559 - val_accuracy: 0.8516
Epoch 3/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3416 - accuracy: 0.8778 - val_loss: 0.4622 - val_accuracy: 0.8497
Epoch 4/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3439 - accuracy: 0.8767 - val_loss: 0.4557 - val_accuracy: 0.8518
Epoch 5/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3432 - accuracy: 0.8772 - val_loss: 0.4628 - val_accuracy: 0.8506
Epoch 6/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3461 - accuracy: 0.8753 - val_loss: 0.4648 - val_accuracy: 0.8472
Epoch 7/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3462 - accuracy: 0.8757 - val_loss: 0.4608 - val_accuracy: 0.8515
Epoch 8/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3440 - accuracy: 0.8780 - val_loss: 0.4616 - val_accuracy: 0.8497
Epoch 9/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3459 - accuracy: 0.8763 - val_loss: 0.4758 - val_accuracy: 0.8496
Epoch 10/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3472 - accuracy: 0.8750 - val_loss: 0.4561 - val_accuracy: 0.8540
CPU times: total: 32.7 s Wall time: 23.4 s
The batch size is a hyperparameter that defines the number of samples to work through before updating the internal model parameters. Instead of having to update only after the whole dataset is trained once. It is indeed better to update after training each batch.
smaller batch_size is will take more time though¶
# evaluate a model using k-fold cross-validation
def evaluate_model(model,dataX, dataY, n_folds=5, valX=X_val, valY=y_val):
scores, histories = list(), list()
# prepare cross validation
kfold = KFold(n_folds, shuffle=True, random_state=1)
# enumerate splits
for train_ix, test_ix in kfold.split(dataX):
# select rows for train and test
trainX, trainY, testX, testY = dataX[train_ix], dataY[train_ix], dataX[test_ix], dataY[test_ix]
# fit model
history = model.fit(trainX, trainY, epochs=50, batch_size=128,validation_data=(testX, testY), verbose=0, callbacks=[EarlyStopping(monitor='val_loss', patience=10)])
# evaluate model
_, acc = model.evaluate(valX, valY, verbose=0)
print('> %.3f' % (acc * 100.0))
# append scores
scores.append(acc)
histories.append(history)
return scores, histories
%%time
from keras.layers import BatchNormalization
fashion_model_batch = Sequential()
fashion_model_batch.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model_batch.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model_batch.add(BatchNormalization())
fashion_model_batch.add(Flatten())
fashion_model_batch.add(Dense(128, activation='linear'))
fashion_model_batch.add(Dense(num_classes, activation='softmax'))
fashion_model_batch.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model_batch.summary()
h_callback = fashion_model.fit(X_train, y_train, epochs = 10, batch_size=128,validation_data=(X_test, y_test))
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
Model: "sequential_8"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_9 (Conv2D) (None, 26, 26, 32) 320
conv2d_10 (Conv2D) (None, 24, 24, 64) 18496
batch_normalization (BatchN (None, 24, 24, 64) 256
ormalization)
flatten_5 (Flatten) (None, 36864) 0
dense_19 (Dense) (None, 128) 4718720
dense_20 (Dense) (None, 10) 1290
=================================================================
Total params: 4,739,082
Trainable params: 4,738,954
Non-trainable params: 128
_________________________________________________________________
Epoch 1/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3434 - accuracy: 0.8772 - val_loss: 0.4657 - val_accuracy: 0.8509
Epoch 2/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3437 - accuracy: 0.8770 - val_loss: 0.4669 - val_accuracy: 0.8482
Epoch 3/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3452 - accuracy: 0.8762 - val_loss: 0.4632 - val_accuracy: 0.8489
Epoch 4/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3444 - accuracy: 0.8772 - val_loss: 0.4654 - val_accuracy: 0.8487
Epoch 5/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3470 - accuracy: 0.8750 - val_loss: 0.4720 - val_accuracy: 0.8482
Epoch 6/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3440 - accuracy: 0.8775 - val_loss: 0.4734 - val_accuracy: 0.8455
Epoch 7/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3437 - accuracy: 0.8782 - val_loss: 0.4632 - val_accuracy: 0.8494
Epoch 8/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3466 - accuracy: 0.8747 - val_loss: 0.4813 - val_accuracy: 0.8478
Epoch 9/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3440 - accuracy: 0.8771 - val_loss: 0.4646 - val_accuracy: 0.8523
Epoch 10/10
319/319 [==============================] - 2s 7ms/step - loss: 0.3424 - accuracy: 0.8766 - val_loss: 0.4726 - val_accuracy: 0.8488
CPU times: total: 31.8 s Wall time: 23.2 s
## Importing data and augmenting it
data = tf.keras.datasets.fashion_mnist.load_data()
# split the data into train and test yet also have normal distributed classes
# split data into train , test and validation
X_train, X_test, y_train, y_test = train_test_split(data[0][0], data[0][1], test_size=0.2, random_state=42,stratify=data[0][1])
# 80% train, 20% test
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42,stratify=y_train ) # 0.2 x 0.8 = 0.16 = 16% of original data
X_train = X_train / 255.0
X_test = X_test / 255.0
X_val = X_val / 255.0
print('before',X_train.shape,X_test.shape)
print('before',y_train.shape,y_test.shape)
X_train = X_train.reshape((X_train.shape[0], 28, 28, 1))
X_test = X_test.reshape((X_test.shape[0], 28, 28, 1))
X_val = X_val.reshape((X_val.shape[0], 28, 28, 1))
y_test_label = y_test
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
y_val = to_categorical(y_val)
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=50,
width_shift_range=0.01,
height_shift_range=0.01,
horizontal_flip=False,
vertical_flip=False)
datagen.fit(X_train)
from matplotlib import pyplot as plt
gen = datagen.flow(X_train[1:2], batch_size=1)
for i in range(1, 6):
plt.subplot(1,5,i)
plt.axis("off")
plt.imshow(gen.next().squeeze())
plt.plot()
plt.show()
print('after',X_train.shape,X_test.shape)
print('after',y_train.shape,y_test.shape)
before (40800, 28, 28) (12000, 28, 28) before (40800,) (12000,)
after (40800, 28, 28, 1) (12000, 28, 28, 1) after (40800, 10) (12000, 10)

from tensorflow.keras.layers import Conv2D,Flatten
from numpy import mean
from numpy import std
from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.layers import Activation
from keras.layers import LeakyReLU
def model_relu():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
return fashion_model
run_test_harness(model_relu(),X_train, y_train)
> 88.208 > 87.236 > 89.000 > 88.667 > 88.486
Accuracy: mean=88.319 std=0.600, n=5
def model_tanh():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='tanh',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='tanh'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='tanh'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='linear'))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_tanh(),X_train, y_train)
Model: "sequential_10"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_14 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_4 (MaxPooling (None, 13, 13, 32) 0
2D)
conv2d_15 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_5 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_16 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_6 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten_7 (Flatten) (None, 128) 0
dense_23 (Dense) (None, 128) 16512
dense_24 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 88.250
> 88.264
> 88.236
> 86.944
> 88.625
Accuracy: mean=88.064 std=0.578, n=5
from keras.layers import LeakyReLU
# fix random seed for reproducibility
def model_leaky_relu():
seed = 1
np.random.seed(seed)
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D((2, 2),padding='same'))
fashion_model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='linear'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_leaky_relu(),X_train, y_train)
Model: "sequential_11"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_17 (Conv2D) (None, 28, 28, 32) 320
leaky_re_lu (LeakyReLU) (None, 28, 28, 32) 0
max_pooling2d_7 (MaxPooling (None, 14, 14, 32) 0
2D)
conv2d_18 (Conv2D) (None, 14, 14, 64) 18496
leaky_re_lu_1 (LeakyReLU) (None, 14, 14, 64) 0
max_pooling2d_8 (MaxPooling (None, 7, 7, 64) 0
2D)
conv2d_19 (Conv2D) (None, 7, 7, 128) 73856
leaky_re_lu_2 (LeakyReLU) (None, 7, 7, 128) 0
max_pooling2d_9 (MaxPooling (None, 4, 4, 128) 0
2D)
flatten_8 (Flatten) (None, 2048) 0
dense_25 (Dense) (None, 128) 262272
leaky_re_lu_3 (LeakyReLU) (None, 128) 0
dense_26 (Dense) (None, 10) 1290
=================================================================
Total params: 356,234
Trainable params: 356,234
Non-trainable params: 0
_________________________________________________________________
> 91.444
> 91.514
> 91.556
> 91.875
> 91.861
Accuracy: mean=91.650 std=0.182, n=5
def model_selu():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='selu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='selu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='selu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='selu'))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_selu(),X_train, y_train)
Model: "sequential_12"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_20 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_10 (MaxPoolin (None, 13, 13, 32) 0
g2D)
conv2d_21 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_11 (MaxPoolin (None, 5, 5, 64) 0
g2D)
conv2d_22 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_12 (MaxPoolin (None, 1, 1, 128) 0
g2D)
flatten_9 (Flatten) (None, 128) 0
dense_27 (Dense) (None, 128) 16512
dense_28 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 87.958
> 88.319
> 88.486
> 87.792
> 89.292
Accuracy: mean=88.369 std=0.523, n=5
def model_elu():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='elu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='elu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='elu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='elu'))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_elu(),X_train, y_train)
Model: "sequential_13"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_23 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_13 (MaxPoolin (None, 13, 13, 32) 0
g2D)
conv2d_24 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_14 (MaxPoolin (None, 5, 5, 64) 0
g2D)
conv2d_25 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_15 (MaxPoolin (None, 1, 1, 128) 0
g2D)
flatten_10 (Flatten) (None, 128) 0
dense_29 (Dense) (None, 128) 16512
dense_30 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 88.903
> 88.889
> 88.319
> 88.528
> 87.764
Accuracy: mean=88.481 std=0.421, n=5
Dropout is randomly removing neurons in a layer but when predicting neurons is added back in. How can something random help in AI we may wonder...

In the picture above,if a bad node is blocked it helps the training process. However if a good node is blocked , the neural network can still learn from a separate representation of the data though not the best but its still fine
As our accuracy goes above 90 percent , we may tend to overfit and here we will try drop out to see if it can regularize and perhaps even improve validation scores
def model_dropout():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_dropout(),X_train, y_train)
Model: "sequential_14"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_26 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_16 (MaxPoolin (None, 13, 13, 32) 0
g2D)
conv2d_27 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_17 (MaxPoolin (None, 5, 5, 64) 0
g2D)
conv2d_28 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_18 (MaxPoolin (None, 1, 1, 128) 0
g2D)
flatten_11 (Flatten) (None, 128) 0
dense_31 (Dense) (None, 128) 16512
dropout (Dropout) (None, 128) 0
dense_32 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 89.167
> 89.458
> 88.542
> 89.306
> 89.153
Accuracy: mean=89.125 std=0.312, n=5
Adding drop out does improve performance

Instead of computing the gradients over the entire dataset, it performs a parameter update for each example in the dataset.The problem of SGD is that the updates are frequent and with a high variance, so the objective function heavily fluctuates during training.This fluctuation can be an advantage with respect to batch gradient descent because it allows the function to jump to better local minima, but at the same time it can represent a disadvantage with respect to the convergence in a specific local minima.
def model_sgd():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.SGD(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_sgd(),X_train, y_train)
Model: "sequential_15"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_29 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_19 (MaxPoolin (None, 13, 13, 32) 0
g2D)
conv2d_30 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_20 (MaxPoolin (None, 5, 5, 64) 0
g2D)
conv2d_31 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_21 (MaxPoolin (None, 1, 1, 128) 0
g2D)
flatten_12 (Flatten) (None, 128) 0
dense_33 (Dense) (None, 128) 16512
dropout_1 (Dropout) (None, 128) 0
dense_34 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 84.542
> 86.764
> 86.708
> 87.278
> 86.972
Accuracy: mean=86.453 std=0.976, n=5
It adapts the learning rate to the parameters performing small updates for frequently occurring features and large updates for the rarest ones.In this way, the network is able to capture information belonging to features that are not frequent, putting them in evidence and giving them the right weight.The problem of Adagrad is that it adjusts the learning rate for each parameter according to all the past gradients. So, the possibility of having a very small learning rate after a high number of steps — resulting from the accumulation of all the past gradients — is relevant.If the learning rate is too much small, we simply can’t update weights and the consequence is that the network doesn’t learn anymore.
def model_adagrad():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adagrad(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_adagrad(),X_train, y_train)
Model: "sequential_16"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_32 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_22 (MaxPoolin (None, 13, 13, 32) 0
g2D)
conv2d_33 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_23 (MaxPoolin (None, 5, 5, 64) 0
g2D)
conv2d_34 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_24 (MaxPoolin (None, 1, 1, 128) 0
g2D)
flatten_13 (Flatten) (None, 128) 0
dense_35 (Dense) (None, 128) 16512
dropout_2 (Dropout) (None, 128) 0
dense_36 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 76.778
> 79.917
> 80.875
> 82.139
> 82.806
Accuracy: mean=80.503 std=2.114, n=5
It improves the previous algorithm by introducing a history window which sets a fixed number of past gradients to take in consideration during the training.In this way, we don’t have the problem of the vanishing learning rate.
def model_adadelta():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_adadelta(),X_train, y_train)
Model: "sequential_17"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_35 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_25 (MaxPoolin (None, 13, 13, 32) 0
g2D)
conv2d_36 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_26 (MaxPoolin (None, 5, 5, 64) 0
g2D)
conv2d_37 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_27 (MaxPoolin (None, 1, 1, 128) 0
g2D)
flatten_14 (Flatten) (None, 128) 0
dense_37 (Dense) (None, 128) 16512
dropout_3 (Dropout) (None, 128) 0
dense_38 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 56.389
> 67.097
> 71.264
> 73.375
> 74.736
Accuracy: mean=68.572 std=6.617, n=5
It is very similar to Adadelta. The only difference is in the way they manage the past gradients.
def model_rmsprop():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.RMSprop(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_rmsprop(),X_train, y_train)
Model: "sequential_18"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_38 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_28 (MaxPoolin (None, 13, 13, 32) 0
g2D)
conv2d_39 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_29 (MaxPoolin (None, 5, 5, 64) 0
g2D)
conv2d_40 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_30 (MaxPoolin (None, 1, 1, 128) 0
g2D)
flatten_15 (Flatten) (None, 128) 0
dense_39 (Dense) (None, 128) 16512
dropout_4 (Dropout) (None, 128) 0
dense_40 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 89.097
> 89.861
> 88.972
> 88.833
> 89.292
Accuracy: mean=89.211 std=0.358, n=5
It adds to the advantages of Adadelta and RMSprop, the storing of an exponentially decaying average of past gradients similar to momentum.
def model_adam():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_adam(),X_train, y_train)
Model: "sequential_19"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_41 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_31 (MaxPoolin (None, 13, 13, 32) 0
g2D)
conv2d_42 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_32 (MaxPoolin (None, 5, 5, 64) 0
g2D)
conv2d_43 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_33 (MaxPoolin (None, 1, 1, 128) 0
g2D)
flatten_16 (Flatten) (None, 128) 0
dense_41 (Dense) (None, 128) 16512
dropout_5 (Dropout) (None, 128) 0
dense_42 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 89.306
> 88.625
> 88.611
> 89.181
> 88.875
Accuracy: mean=88.919 std=0.283, n=5
reduced learning rate on plateau is added and batch size is increased to hasten the compute time and also more epochs with early stopping to see performance of models.

reducing learning rate as val_loss plateaus allows optimizer to more efficiently find minimum in loss surface
from keras.callbacks import ReduceLROnPlateau
### function for looping through all the models
def tune_model_act_opt(act,opt):
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation=act,
kernel_initializer='he_normal',
input_shape=(28,28,1)))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation=act))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation=act))
model.add(BatchNormalization())
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation=act))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(10, activation='softmax'))
# compile model
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt,metrics=['accuracy'])
return model
# plot diagnostic learning curves
def summarize_diagnostics(histories,act,opt):
# plot loss
pyplot.subplot(211)
pyplot.title(f'Cross Entropy Loss {act},{opt}')
pyplot.plot(histories.history['loss'], color='blue', label='train')
pyplot.plot(histories.history['val_loss'], color='orange', label='test')
# plot accuracy
pyplot.subplot(212)
pyplot.title(f'Classification Accuracy {act},{opt}')
pyplot.plot(histories.history['accuracy'], color='blue', label='train')
pyplot.plot(histories.history['val_accuracy'], color='orange', label='test')
pyplot.legend()
pyplot.show()
def run_test_harness_act_opt(act,opt,X_train, y_train, X_test, y_test,X_val,y_val):
model = tune_model_act_opt(act,opt)
# fit model
h_callback = EarlyStopping(monitor='val_accuracy', patience=5)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
history = model.fit(X_train, y_train, epochs=100, batch_size=128,validation_data=(X_val, y_val), verbose=0 ,callbacks=[h_callback,reduce_lr])
# evaluate model
_, acc = model.evaluate(X_test, y_test, verbose=0)
print('> %.3f' % (acc * 100.0))
summarize_diagnostics(history,act,opt)
return (acc*100)
from itertools import product
optimizers = ['adam', 'sgd', 'rmsprop', 'adagrad', 'adadelta', 'adamax', 'nadam']
activation = ['relu', 'selu', 'elu', 'tanh',keras.layers.LeakyReLU(alpha=0.01)]
scores_act_opt = list()
activation_names = ['relu', 'selu', 'elu', 'tanh','LeakyReLU']
act_opt = list(product(activation_names,optimizers))
for act,opt in product(activation,optimizers):
acc = run_test_harness_act_opt(act,opt,X_train, y_train, X_test, y_test,X_val,y_val)
scores_act_opt.append(acc)
> 92.467
> 89.658
> 92.250
> 86.658
> 79.542
> 92.442
> 92.108
> 91.183
> 87.242
> 91.817
> 85.125
> 78.992
> 92.167
> 90.833
> 92.075
> 90.258
> 91.750
> 84.575
> 79.875
> 92.042
> 92.575
> 91.092
> 88.283
> 90.483
> 85.100
> 80.008
> 91.625
> 91.192
> 92.983
> 90.375
> 91.958
> 87.133
> 79.100
> 92.442
> 92.550
# create a dataframe of scores_act_opt and act_opt
df_act_opt = pd.DataFrame({'act_opt':act_opt,'scores_act_opt':scores_act_opt})
# highlight highest score_act_opt in df_act_opt
df_act_opt.style.highlight_max(subset=['scores_act_opt'],color='green', axis=0)
# style only score_act_opt in df_act_opt
| act_opt | scores_act_opt | |
|---|---|---|
| 0 | ('relu', 'adam') | 92.466664 |
| 1 | ('relu', 'sgd') | 89.658332 |
| 2 | ('relu', 'rmsprop') | 92.250001 |
| 3 | ('relu', 'adagrad') | 86.658335 |
| 4 | ('relu', 'adadelta') | 79.541665 |
| 5 | ('relu', 'adamax') | 92.441666 |
| 6 | ('relu', 'nadam') | 92.108333 |
| 7 | ('selu', 'adam') | 91.183335 |
| 8 | ('selu', 'sgd') | 87.241668 |
| 9 | ('selu', 'rmsprop') | 91.816664 |
| 10 | ('selu', 'adagrad') | 85.124999 |
| 11 | ('selu', 'adadelta') | 78.991669 |
| 12 | ('selu', 'adamax') | 92.166668 |
| 13 | ('selu', 'nadam') | 90.833336 |
| 14 | ('elu', 'adam') | 92.075002 |
| 15 | ('elu', 'sgd') | 90.258336 |
| 16 | ('elu', 'rmsprop') | 91.750002 |
| 17 | ('elu', 'adagrad') | 84.574997 |
| 18 | ('elu', 'adadelta') | 79.874998 |
| 19 | ('elu', 'adamax') | 92.041665 |
| 20 | ('elu', 'nadam') | 92.575002 |
| 21 | ('tanh', 'adam') | 91.091669 |
| 22 | ('tanh', 'sgd') | 88.283336 |
| 23 | ('tanh', 'rmsprop') | 90.483332 |
| 24 | ('tanh', 'adagrad') | 85.100001 |
| 25 | ('tanh', 'adadelta') | 80.008334 |
| 26 | ('tanh', 'adamax') | 91.624999 |
| 27 | ('tanh', 'nadam') | 91.191667 |
| 28 | ('LeakyReLU', 'adam') | 92.983335 |
| 29 | ('LeakyReLU', 'sgd') | 90.375000 |
| 30 | ('LeakyReLU', 'rmsprop') | 91.958332 |
| 31 | ('LeakyReLU', 'adagrad') | 87.133336 |
| 32 | ('LeakyReLU', 'adadelta') | 79.100001 |
| 33 | ('LeakyReLU', 'adamax') | 92.441666 |
| 34 | ('LeakyReLU', 'nadam') | 92.549998 |
seed = 1
np.random.seed(seed)
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D((2, 2),padding='same'))
fashion_model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='linear'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
# fit model
h_callback = EarlyStopping(monitor='val_accuracy', patience=10)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
history = fashion_model.fit(X_train, y_train, epochs=100, batch_size=128,validation_data=(X_val, y_val), verbose=0 ,callbacks=[h_callback,reduce_lr])
# evaluate model
_, acc = fashion_model.evaluate(X_test, y_test, verbose=0)
print('> %.3f' % (acc * 100.0))
summarize_diagnostics(history,act,opt)
# learning curves
summarize_diagnostics(history,act,opt)
Model: "sequential_55"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_149 (Conv2D) (None, 28, 28, 32) 320
leaky_re_lu_5 (LeakyReLU) (None, 28, 28, 32) 0
max_pooling2d_104 (MaxPooli (None, 14, 14, 32) 0
ng2D)
conv2d_150 (Conv2D) (None, 14, 14, 64) 18496
leaky_re_lu_6 (LeakyReLU) (None, 14, 14, 64) 0
max_pooling2d_105 (MaxPooli (None, 7, 7, 64) 0
ng2D)
conv2d_151 (Conv2D) (None, 7, 7, 128) 73856
leaky_re_lu_7 (LeakyReLU) (None, 7, 7, 128) 0
max_pooling2d_106 (MaxPooli (None, 4, 4, 128) 0
ng2D)
flatten_87 (Flatten) (None, 2048) 0
dense_113 (Dense) (None, 128) 262272
leaky_re_lu_8 (LeakyReLU) (None, 128) 0
dense_114 (Dense) (None, 10) 1290
=================================================================
Total params: 356,234
Trainable params: 356,234
Non-trainable params: 0
_________________________________________________________________
> 92.483

Graph 1. Model with a good fit and high variance. Source: https://www.researchgate.net/publication/332412613
We would like a good fit instead as sometimes there are noisy data that will prevent us from generalizing well, preventing the model from predicting well on real world data
import tensorflow
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
kernel_initializer='he_normal',
input_shape=(28,28,1)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(10, activation='softmax', kernel_regularizer=tensorflow.keras.regularizers.L1(l1=0.05)))
#loop through the optimizers
model.compile(optimizer='adam',
loss=tf.keras.losses.categorical_crossentropy,
metrics=['accuracy'])
model.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
h_callback = model.fit(X_train, y_train, epochs = 100,
validation_data=(X_val, y_val), callbacks=[early_stopping,reduce_lr])
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
Model: "sequential_56"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_152 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_107 (MaxPooli (None, 13, 13, 32) 0
ng2D)
dropout_146 (Dropout) (None, 13, 13, 32) 0
conv2d_153 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_108 (MaxPooli (None, 5, 5, 64) 0
ng2D)
dropout_147 (Dropout) (None, 5, 5, 64) 0
conv2d_154 (Conv2D) (None, 3, 3, 128) 73856
dropout_148 (Dropout) (None, 3, 3, 128) 0
flatten_88 (Flatten) (None, 1152) 0
dense_115 (Dense) (None, 128) 147584
dropout_149 (Dropout) (None, 128) 0
flatten_89 (Flatten) (None, 128) 0
dense_116 (Dense) (None, 10) 1290
=================================================================
Total params: 241,546
Trainable params: 241,546
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
1275/1275 [==============================] - 8s 5ms/step - loss: 1.4461 - accuracy: 0.7104 - val_loss: 0.6739 - val_accuracy: 0.8061 - lr: 0.0010
Epoch 2/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.7220 - accuracy: 0.7864 - val_loss: 0.6123 - val_accuracy: 0.8138 - lr: 0.0010
Epoch 3/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.6550 - accuracy: 0.8093 - val_loss: 0.5658 - val_accuracy: 0.8490 - lr: 0.0010
Epoch 4/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.6100 - accuracy: 0.8290 - val_loss: 0.4906 - val_accuracy: 0.8707 - lr: 0.0010
Epoch 5/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.5685 - accuracy: 0.8435 - val_loss: 0.4727 - val_accuracy: 0.8717 - lr: 0.0010
Epoch 6/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.5415 - accuracy: 0.8525 - val_loss: 0.4328 - val_accuracy: 0.8863 - lr: 0.0010
Epoch 7/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.5315 - accuracy: 0.8563 - val_loss: 0.4392 - val_accuracy: 0.8843 - lr: 0.0010
Epoch 8/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.5094 - accuracy: 0.8651 - val_loss: 0.4450 - val_accuracy: 0.8792 - lr: 0.0010
Epoch 9/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4925 - accuracy: 0.8700 - val_loss: 0.4259 - val_accuracy: 0.8956 - lr: 0.0010
Epoch 10/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4813 - accuracy: 0.8748 - val_loss: 0.4008 - val_accuracy: 0.9019 - lr: 0.0010
Epoch 11/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4769 - accuracy: 0.8765 - val_loss: 0.4272 - val_accuracy: 0.8908 - lr: 0.0010
Epoch 12/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4656 - accuracy: 0.8804 - val_loss: 0.3960 - val_accuracy: 0.8956 - lr: 0.0010
Epoch 13/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4573 - accuracy: 0.8817 - val_loss: 0.4175 - val_accuracy: 0.8867 - lr: 0.0010
Epoch 14/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4521 - accuracy: 0.8825 - val_loss: 0.3863 - val_accuracy: 0.9026 - lr: 0.0010
Epoch 15/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4432 - accuracy: 0.8874 - val_loss: 0.3769 - val_accuracy: 0.9075 - lr: 0.0010
Epoch 16/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4380 - accuracy: 0.8875 - val_loss: 0.3842 - val_accuracy: 0.9026 - lr: 0.0010
Epoch 17/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4369 - accuracy: 0.8896 - val_loss: 0.3989 - val_accuracy: 0.9013 - lr: 0.0010
Epoch 18/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4315 - accuracy: 0.8928 - val_loss: 0.3802 - val_accuracy: 0.9069 - lr: 0.0010
Epoch 19/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4330 - accuracy: 0.8916 - val_loss: 0.3901 - val_accuracy: 0.9057 - lr: 0.0010
Epoch 20/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4186 - accuracy: 0.8945 - val_loss: 0.3669 - val_accuracy: 0.9112 - lr: 0.0010
Epoch 21/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4200 - accuracy: 0.8944 - val_loss: 0.3628 - val_accuracy: 0.9103 - lr: 0.0010
Epoch 22/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4212 - accuracy: 0.8941 - val_loss: 0.3823 - val_accuracy: 0.9046 - lr: 0.0010
Epoch 23/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4184 - accuracy: 0.8961 - val_loss: 0.3657 - val_accuracy: 0.9093 - lr: 0.0010
Epoch 24/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4165 - accuracy: 0.8953 - val_loss: 0.3511 - val_accuracy: 0.9074 - lr: 0.0010
Epoch 25/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4064 - accuracy: 0.8982 - val_loss: 0.3808 - val_accuracy: 0.9051 - lr: 0.0010
Epoch 26/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4065 - accuracy: 0.8979 - val_loss: 0.3606 - val_accuracy: 0.9122 - lr: 0.0010
Epoch 27/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3985 - accuracy: 0.9016 - val_loss: 0.3741 - val_accuracy: 0.9061 - lr: 0.0010
Epoch 28/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4036 - accuracy: 0.9003 - val_loss: 0.3781 - val_accuracy: 0.9119 - lr: 0.0010
Epoch 29/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3962 - accuracy: 0.9019 - val_loss: 0.3673 - val_accuracy: 0.9112 - lr: 0.0010
Epoch 30/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3930 - accuracy: 0.9032 - val_loss: 0.3678 - val_accuracy: 0.9111 - lr: 0.0010
Epoch 31/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3933 - accuracy: 0.9040 - val_loss: 0.3629 - val_accuracy: 0.9114 - lr: 0.0010
Epoch 32/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3942 - accuracy: 0.9023 - val_loss: 0.3781 - val_accuracy: 0.9103 - lr: 0.0010
Epoch 33/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3876 - accuracy: 0.9041 - val_loss: 0.3588 - val_accuracy: 0.9117 - lr: 0.0010
Epoch 34/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3949 - accuracy: 0.9026 - val_loss: 0.3846 - val_accuracy: 0.9112 - lr: 0.0010
Epoch 34: early stopping
import tensorflow
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
kernel_initializer='he_normal',
input_shape=(28,28,1)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(10, activation='softmax', kernel_regularizer=tensorflow.keras.regularizers.L2(l2=0.05)))
#loop through the optimizers
model.compile(optimizer='adam',
loss=tf.keras.losses.categorical_crossentropy,
metrics=['accuracy'])
model.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
h_callback = model.fit(X_train, y_train, epochs = 100,
validation_data=(X_val, y_val), callbacks=[early_stopping,reduce_lr])
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
Model: "sequential_57"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_155 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_109 (MaxPooli (None, 13, 13, 32) 0
ng2D)
dropout_150 (Dropout) (None, 13, 13, 32) 0
conv2d_156 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_110 (MaxPooli (None, 5, 5, 64) 0
ng2D)
dropout_151 (Dropout) (None, 5, 5, 64) 0
conv2d_157 (Conv2D) (None, 3, 3, 128) 73856
dropout_152 (Dropout) (None, 3, 3, 128) 0
flatten_90 (Flatten) (None, 1152) 0
dense_117 (Dense) (None, 128) 147584
dropout_153 (Dropout) (None, 128) 0
flatten_91 (Flatten) (None, 128) 0
dense_118 (Dense) (None, 10) 1290
=================================================================
Total params: 241,546
Trainable params: 241,546
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.8246 - accuracy: 0.7481 - val_loss: 0.4826 - val_accuracy: 0.8432 - lr: 0.0010
Epoch 2/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.5009 - accuracy: 0.8314 - val_loss: 0.3962 - val_accuracy: 0.8724 - lr: 0.0010
Epoch 3/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4407 - accuracy: 0.8528 - val_loss: 0.3537 - val_accuracy: 0.8792 - lr: 0.0010
Epoch 4/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3985 - accuracy: 0.8660 - val_loss: 0.3261 - val_accuracy: 0.8913 - lr: 0.0010
Epoch 5/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3787 - accuracy: 0.8737 - val_loss: 0.3087 - val_accuracy: 0.8964 - lr: 0.0010
Epoch 6/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3541 - accuracy: 0.8809 - val_loss: 0.2978 - val_accuracy: 0.8982 - lr: 0.0010
Epoch 7/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3432 - accuracy: 0.8836 - val_loss: 0.2887 - val_accuracy: 0.9039 - lr: 0.0010
Epoch 8/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3340 - accuracy: 0.8886 - val_loss: 0.2856 - val_accuracy: 0.9017 - lr: 0.0010
Epoch 9/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3298 - accuracy: 0.8890 - val_loss: 0.2723 - val_accuracy: 0.9082 - lr: 0.0010
Epoch 10/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3218 - accuracy: 0.8921 - val_loss: 0.2769 - val_accuracy: 0.9047 - lr: 0.0010
Epoch 11/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3139 - accuracy: 0.8937 - val_loss: 0.2675 - val_accuracy: 0.9100 - lr: 0.0010
Epoch 12/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3064 - accuracy: 0.8976 - val_loss: 0.2795 - val_accuracy: 0.9049 - lr: 0.0010
Epoch 13/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3026 - accuracy: 0.8982 - val_loss: 0.2535 - val_accuracy: 0.9165 - lr: 0.0010
Epoch 14/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2920 - accuracy: 0.9020 - val_loss: 0.2552 - val_accuracy: 0.9149 - lr: 0.0010
Epoch 15/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2899 - accuracy: 0.9028 - val_loss: 0.2528 - val_accuracy: 0.9164 - lr: 0.0010
Epoch 16/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2871 - accuracy: 0.9044 - val_loss: 0.2671 - val_accuracy: 0.9112 - lr: 0.0010
Epoch 17/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2806 - accuracy: 0.9056 - val_loss: 0.2589 - val_accuracy: 0.9131 - lr: 0.0010
Epoch 18/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2770 - accuracy: 0.9073 - val_loss: 0.2537 - val_accuracy: 0.9169 - lr: 0.0010
Epoch 19/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2824 - accuracy: 0.9053 - val_loss: 0.2555 - val_accuracy: 0.9150 - lr: 0.0010
Epoch 20/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2752 - accuracy: 0.9093 - val_loss: 0.2448 - val_accuracy: 0.9190 - lr: 0.0010
Epoch 21/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2731 - accuracy: 0.9081 - val_loss: 0.2555 - val_accuracy: 0.9156 - lr: 0.0010
Epoch 22/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2682 - accuracy: 0.9097 - val_loss: 0.2537 - val_accuracy: 0.9172 - lr: 0.0010
Epoch 23/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2676 - accuracy: 0.9108 - val_loss: 0.2434 - val_accuracy: 0.9201 - lr: 0.0010
Epoch 24/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2679 - accuracy: 0.9107 - val_loss: 0.2452 - val_accuracy: 0.9200 - lr: 0.0010
Epoch 25/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2609 - accuracy: 0.9134 - val_loss: 0.2620 - val_accuracy: 0.9135 - lr: 0.0010
Epoch 26/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2601 - accuracy: 0.9123 - val_loss: 0.2546 - val_accuracy: 0.9150 - lr: 0.0010
Epoch 27/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2605 - accuracy: 0.9133 - val_loss: 0.2497 - val_accuracy: 0.9167 - lr: 0.0010
Epoch 28/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2546 - accuracy: 0.9152 - val_loss: 0.2484 - val_accuracy: 0.9181 - lr: 0.0010
Epoch 29/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2552 - accuracy: 0.9156 - val_loss: 0.2534 - val_accuracy: 0.9144 - lr: 0.0010
Epoch 30/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2557 - accuracy: 0.9140 - val_loss: 0.2456 - val_accuracy: 0.9169 - lr: 0.0010
Epoch 31/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2522 - accuracy: 0.9154 - val_loss: 0.2431 - val_accuracy: 0.9200 - lr: 0.0010
Epoch 32/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2495 - accuracy: 0.9160 - val_loss: 0.2382 - val_accuracy: 0.9207 - lr: 0.0010
Epoch 33/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2473 - accuracy: 0.9175 - val_loss: 0.2493 - val_accuracy: 0.9165 - lr: 0.0010
Epoch 34/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2456 - accuracy: 0.9173 - val_loss: 0.2482 - val_accuracy: 0.9175 - lr: 0.0010
Epoch 35/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2497 - accuracy: 0.9167 - val_loss: 0.2442 - val_accuracy: 0.9172 - lr: 0.0010
Epoch 36/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2491 - accuracy: 0.9157 - val_loss: 0.2442 - val_accuracy: 0.9175 - lr: 0.0010
Epoch 37/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2429 - accuracy: 0.9170 - val_loss: 0.2495 - val_accuracy: 0.9168 - lr: 0.0010
Epoch 38/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2417 - accuracy: 0.9186 - val_loss: 0.2553 - val_accuracy: 0.9132 - lr: 0.0010
Epoch 39/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2401 - accuracy: 0.9193 - val_loss: 0.2410 - val_accuracy: 0.9199 - lr: 0.0010
Epoch 40/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2429 - accuracy: 0.9190 - val_loss: 0.2445 - val_accuracy: 0.9232 - lr: 0.0010
Epoch 41/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2444 - accuracy: 0.9178 - val_loss: 0.2429 - val_accuracy: 0.9199 - lr: 0.0010
Epoch 42/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2383 - accuracy: 0.9197 - val_loss: 0.2378 - val_accuracy: 0.9207 - lr: 0.0010
Epoch 43/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2399 - accuracy: 0.9214 - val_loss: 0.2581 - val_accuracy: 0.9142 - lr: 0.0010
Epoch 44/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2348 - accuracy: 0.9209 - val_loss: 0.2530 - val_accuracy: 0.9147 - lr: 0.0010
Epoch 45/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2364 - accuracy: 0.9216 - val_loss: 0.2426 - val_accuracy: 0.9218 - lr: 0.0010
Epoch 46/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2361 - accuracy: 0.9211 - val_loss: 0.2408 - val_accuracy: 0.9204 - lr: 0.0010
Epoch 47/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2325 - accuracy: 0.9229 - val_loss: 0.2358 - val_accuracy: 0.9214 - lr: 0.0010
Epoch 48/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2333 - accuracy: 0.9219 - val_loss: 0.2478 - val_accuracy: 0.9233 - lr: 0.0010
Epoch 49/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2345 - accuracy: 0.9228 - val_loss: 0.2464 - val_accuracy: 0.9179 - lr: 0.0010
Epoch 50/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2333 - accuracy: 0.9233 - val_loss: 0.2402 - val_accuracy: 0.9197 - lr: 0.0010
Epoch 51/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2308 - accuracy: 0.9222 - val_loss: 0.2450 - val_accuracy: 0.9194 - lr: 0.0010
Epoch 52/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2311 - accuracy: 0.9215 - val_loss: 0.2454 - val_accuracy: 0.9192 - lr: 0.0010
Epoch 53/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2319 - accuracy: 0.9208 - val_loss: 0.2407 - val_accuracy: 0.9200 - lr: 0.0010
Epoch 54/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2250 - accuracy: 0.9250 - val_loss: 0.2362 - val_accuracy: 0.9228 - lr: 0.0010
Epoch 55/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2306 - accuracy: 0.9227 - val_loss: 0.2590 - val_accuracy: 0.9169 - lr: 0.0010
Epoch 56/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2280 - accuracy: 0.9238 - val_loss: 0.2541 - val_accuracy: 0.9193 - lr: 0.0010
Epoch 57/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2306 - accuracy: 0.9245 - val_loss: 0.2408 - val_accuracy: 0.9192 - lr: 0.0010
Epoch 57: early stopping
import tensorflow
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
kernel_initializer='he_normal',
input_shape=(28,28,1)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(10, activation='softmax', kernel_regularizer=tensorflow.keras.regularizers.L1L2(l1=0.01, l2=0.01)))
#loop through the optimizers
model.compile(optimizer='adam',
loss=tf.keras.losses.categorical_crossentropy,
metrics=['accuracy'])
model.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
h_callback = model.fit(X_train, y_train, epochs = 100,
validation_data=(X_val, y_val), callbacks=[early_stopping,reduce_lr])
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
Model: "sequential_58"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_158 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_111 (MaxPooli (None, 13, 13, 32) 0
ng2D)
dropout_154 (Dropout) (None, 13, 13, 32) 0
conv2d_159 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_112 (MaxPooli (None, 5, 5, 64) 0
ng2D)
dropout_155 (Dropout) (None, 5, 5, 64) 0
conv2d_160 (Conv2D) (None, 3, 3, 128) 73856
dropout_156 (Dropout) (None, 3, 3, 128) 0
flatten_92 (Flatten) (None, 1152) 0
dense_119 (Dense) (None, 128) 147584
dropout_157 (Dropout) (None, 128) 0
flatten_93 (Flatten) (None, 128) 0
dense_120 (Dense) (None, 10) 1290
=================================================================
Total params: 241,546
Trainable params: 241,546
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
1275/1275 [==============================] - 8s 6ms/step - loss: 0.9849 - accuracy: 0.7448 - val_loss: 0.5601 - val_accuracy: 0.8464 - lr: 0.0010
Epoch 2/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.5785 - accuracy: 0.8284 - val_loss: 0.4511 - val_accuracy: 0.8700 - lr: 0.0010
Epoch 3/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.5069 - accuracy: 0.8484 - val_loss: 0.4080 - val_accuracy: 0.8821 - lr: 0.0010
Epoch 4/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4700 - accuracy: 0.8612 - val_loss: 0.3916 - val_accuracy: 0.8856 - lr: 0.0010
Epoch 5/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4449 - accuracy: 0.8678 - val_loss: 0.3777 - val_accuracy: 0.8894 - lr: 0.0010
Epoch 6/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4275 - accuracy: 0.8739 - val_loss: 0.3850 - val_accuracy: 0.8838 - lr: 0.0010
Epoch 7/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.4152 - accuracy: 0.8767 - val_loss: 0.3551 - val_accuracy: 0.8922 - lr: 0.0010
Epoch 8/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3999 - accuracy: 0.8816 - val_loss: 0.3389 - val_accuracy: 0.9001 - lr: 0.0010
Epoch 9/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3958 - accuracy: 0.8838 - val_loss: 0.3404 - val_accuracy: 0.9015 - lr: 0.0010
Epoch 10/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3848 - accuracy: 0.8864 - val_loss: 0.3446 - val_accuracy: 0.8982 - lr: 0.0010
Epoch 11/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3739 - accuracy: 0.8892 - val_loss: 0.3198 - val_accuracy: 0.9053 - lr: 0.0010
Epoch 12/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3669 - accuracy: 0.8918 - val_loss: 0.3321 - val_accuracy: 0.9026 - lr: 0.0010
Epoch 13/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3648 - accuracy: 0.8926 - val_loss: 0.3198 - val_accuracy: 0.9056 - lr: 0.0010
Epoch 14/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3542 - accuracy: 0.8962 - val_loss: 0.3079 - val_accuracy: 0.9096 - lr: 0.0010
Epoch 15/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3508 - accuracy: 0.8984 - val_loss: 0.3092 - val_accuracy: 0.9108 - lr: 0.0010
Epoch 16/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3484 - accuracy: 0.8973 - val_loss: 0.3034 - val_accuracy: 0.9119 - lr: 0.0010
Epoch 17/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3403 - accuracy: 0.9006 - val_loss: 0.3054 - val_accuracy: 0.9121 - lr: 0.0010
Epoch 18/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3377 - accuracy: 0.8998 - val_loss: 0.3230 - val_accuracy: 0.9047 - lr: 0.0010
Epoch 19/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3377 - accuracy: 0.9003 - val_loss: 0.3085 - val_accuracy: 0.9089 - lr: 0.0010
Epoch 20/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3310 - accuracy: 0.9038 - val_loss: 0.3106 - val_accuracy: 0.9089 - lr: 0.0010
Epoch 21/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3288 - accuracy: 0.9054 - val_loss: 0.3084 - val_accuracy: 0.9112 - lr: 0.0010
Epoch 22/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3327 - accuracy: 0.9044 - val_loss: 0.2956 - val_accuracy: 0.9169 - lr: 0.0010
Epoch 23/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3262 - accuracy: 0.9036 - val_loss: 0.3067 - val_accuracy: 0.9099 - lr: 0.0010
Epoch 24/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3238 - accuracy: 0.9053 - val_loss: 0.2956 - val_accuracy: 0.9151 - lr: 0.0010
Epoch 25/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3215 - accuracy: 0.9076 - val_loss: 0.3329 - val_accuracy: 0.9038 - lr: 0.0010
Epoch 26/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3165 - accuracy: 0.9080 - val_loss: 0.2926 - val_accuracy: 0.9106 - lr: 0.0010
Epoch 27/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3187 - accuracy: 0.9075 - val_loss: 0.3010 - val_accuracy: 0.9133 - lr: 0.0010
Epoch 28/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3145 - accuracy: 0.9075 - val_loss: 0.2992 - val_accuracy: 0.9169 - lr: 0.0010
Epoch 29/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3110 - accuracy: 0.9099 - val_loss: 0.3068 - val_accuracy: 0.9133 - lr: 0.0010
Epoch 30/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3118 - accuracy: 0.9089 - val_loss: 0.2931 - val_accuracy: 0.9149 - lr: 0.0010
Epoch 31/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3103 - accuracy: 0.9101 - val_loss: 0.2962 - val_accuracy: 0.9157 - lr: 0.0010
Epoch 32/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3040 - accuracy: 0.9117 - val_loss: 0.2922 - val_accuracy: 0.9167 - lr: 0.0010
Epoch 33/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3038 - accuracy: 0.9104 - val_loss: 0.2923 - val_accuracy: 0.9167 - lr: 0.0010
Epoch 34/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2988 - accuracy: 0.9143 - val_loss: 0.2869 - val_accuracy: 0.9156 - lr: 0.0010
Epoch 35/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2992 - accuracy: 0.9129 - val_loss: 0.2907 - val_accuracy: 0.9182 - lr: 0.0010
Epoch 36/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.3024 - accuracy: 0.9132 - val_loss: 0.2853 - val_accuracy: 0.9201 - lr: 0.0010
Epoch 37/100
1275/1275 [==============================] - 7s 6ms/step - loss: 0.3010 - accuracy: 0.9142 - val_loss: 0.2869 - val_accuracy: 0.9157 - lr: 0.0010
Epoch 38/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2992 - accuracy: 0.9141 - val_loss: 0.2939 - val_accuracy: 0.9183 - lr: 0.0010
Epoch 39/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2949 - accuracy: 0.9147 - val_loss: 0.2918 - val_accuracy: 0.9174 - lr: 0.0010
Epoch 40/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2971 - accuracy: 0.9148 - val_loss: 0.2940 - val_accuracy: 0.9156 - lr: 0.0010
Epoch 41/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2945 - accuracy: 0.9149 - val_loss: 0.3016 - val_accuracy: 0.9162 - lr: 0.0010
Epoch 42/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2960 - accuracy: 0.9152 - val_loss: 0.2921 - val_accuracy: 0.9190 - lr: 0.0010
Epoch 43/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2893 - accuracy: 0.9163 - val_loss: 0.2980 - val_accuracy: 0.9194 - lr: 0.0010
Epoch 44/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2928 - accuracy: 0.9149 - val_loss: 0.2871 - val_accuracy: 0.9181 - lr: 0.0010
Epoch 45/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2929 - accuracy: 0.9157 - val_loss: 0.2846 - val_accuracy: 0.9179 - lr: 0.0010
Epoch 46/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2897 - accuracy: 0.9179 - val_loss: 0.2883 - val_accuracy: 0.9165 - lr: 0.0010
Epoch 47/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2908 - accuracy: 0.9168 - val_loss: 0.2787 - val_accuracy: 0.9221 - lr: 0.0010
Epoch 48/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2876 - accuracy: 0.9165 - val_loss: 0.2886 - val_accuracy: 0.9185 - lr: 0.0010
Epoch 49/100
1275/1275 [==============================] - 7s 5ms/step - loss: 0.2825 - accuracy: 0.9182 - val_loss: 0.2932 - val_accuracy: 0.9154 - lr: 0.0010
Epoch 50/100
1275/1275 [==============================] - 7s 6ms/step - loss: 0.2885 - accuracy: 0.9181 - val_loss: 0.2943 - val_accuracy: 0.9161 - lr: 0.0010
Epoch 51/100
1275/1275 [==============================] - 8s 6ms/step - loss: 0.2841 - accuracy: 0.9203 - val_loss: 0.2902 - val_accuracy: 0.9183 - lr: 0.0010
Epoch 52/100
1275/1275 [==============================] - 8s 6ms/step - loss: 0.2816 - accuracy: 0.9198 - val_loss: 0.3069 - val_accuracy: 0.9167 - lr: 0.0010
Epoch 53/100
1275/1275 [==============================] - 7s 6ms/step - loss: 0.2822 - accuracy: 0.9188 - val_loss: 0.3071 - val_accuracy: 0.9149 - lr: 0.0010
Epoch 54/100
1275/1275 [==============================] - 7s 6ms/step - loss: 0.2809 - accuracy: 0.9214 - val_loss: 0.3010 - val_accuracy: 0.9181 - lr: 0.0010
Epoch 55/100
1275/1275 [==============================] - 8s 6ms/step - loss: 0.2826 - accuracy: 0.9195 - val_loss: 0.2907 - val_accuracy: 0.9197 - lr: 0.0010
Epoch 56/100
1275/1275 [==============================] - 8s 6ms/step - loss: 0.2835 - accuracy: 0.9194 - val_loss: 0.2956 - val_accuracy: 0.9179 - lr: 0.0010
Epoch 57/100
1275/1275 [==============================] - 8s 6ms/step - loss: 0.2812 - accuracy: 0.9212 - val_loss: 0.3199 - val_accuracy: 0.9090 - lr: 0.0010
Epoch 57: early stopping

from numpy import mean
from numpy import std
from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(10, activation='softmax'))
# compile model
opt = SGD(learning_rate=0.01, momentum=0.9)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
# fit model
h_callback = model.fit(X_train, y_train, epochs = 10,
validation_data=(X_val, y_val))
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
# Plot train vs test accuracy during training
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
Epoch 1/10 1275/1275 [==============================] - 5s 4ms/step - loss: 0.4752 - accuracy: 0.8292 - val_loss: 0.3593 - val_accuracy: 0.8694 Epoch 2/10 1275/1275 [==============================] - 5s 4ms/step - loss: 0.3144 - accuracy: 0.8866 - val_loss: 0.2907 - val_accuracy: 0.8913 Epoch 3/10 1275/1275 [==============================] - 5s 4ms/step - loss: 0.2668 - accuracy: 0.9020 - val_loss: 0.2999 - val_accuracy: 0.8886 Epoch 4/10 1275/1275 [==============================] - 5s 4ms/step - loss: 0.2361 - accuracy: 0.9118 - val_loss: 0.2592 - val_accuracy: 0.9044 Epoch 5/10 1275/1275 [==============================] - 4s 3ms/step - loss: 0.2114 - accuracy: 0.9208 - val_loss: 0.2612 - val_accuracy: 0.9097 Epoch 6/10 1275/1275 [==============================] - 4s 3ms/step - loss: 0.1908 - accuracy: 0.9290 - val_loss: 0.2649 - val_accuracy: 0.9064 Epoch 7/10 1275/1275 [==============================] - 4s 3ms/step - loss: 0.1704 - accuracy: 0.9364 - val_loss: 0.2635 - val_accuracy: 0.9083 Epoch 8/10 1275/1275 [==============================] - 5s 4ms/step - loss: 0.1557 - accuracy: 0.9418 - val_loss: 0.2672 - val_accuracy: 0.9108 Epoch 9/10 1275/1275 [==============================] - 4s 3ms/step - loss: 0.1370 - accuracy: 0.9496 - val_loss: 0.2688 - val_accuracy: 0.9119 Epoch 10/10 1275/1275 [==============================] - 4s 3ms/step - loss: 0.1261 - accuracy: 0.9523 - val_loss: 0.2757 - val_accuracy: 0.9126
pred = model.predict(X_test)
pred = np.argmax(pred, axis=1)
classifation_matrix = confusion_matrix(y_test_label, pred)
# plot confusion matrix
plt.figure(figsize=(10,10))
sns.heatmap(classifation_matrix, annot=True, fmt="d")
plt.title("Confusion matrix")
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
375/375 [==============================] - 1s 1ms/step
I will be attempting to defeat Machine Learning Mastery model after learning the various features of CNNs
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
final_model = Sequential()
final_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',kernel_initializer='he_normal',input_shape=(28,28,1)))
final_model.add(LeakyReLU(alpha=0.1))
final_model.add(MaxPooling2D((2, 2)))
final_model.add(BatchNormalization())
final_model.add(Dropout(0.25))
final_model.add(Conv2D(64, (3, 3), activation='linear'))
final_model.add(LeakyReLU(alpha=0.1))
final_model.add(BatchNormalization())
final_model.add(MaxPooling2D(pool_size=(2, 2)))
final_model.add(Dropout(0.25))
final_model.add(Conv2D(128, (3, 3), activation='linear'))
final_model.add(LeakyReLU(alpha=0.1))
final_model.add(BatchNormalization())
final_model.add(Dropout(0.4))
final_model.add(Flatten())
final_model.add(Dense(128, activation=act))
final_model.add(Dropout(0.3))
final_model.add(Flatten())
final_model.add(Dense(10, activation='softmax', kernel_regularizer=tensorflow.keras.regularizers.L1L2(l1=0.01, l2=0.01)))
final_model.compile(optimizer='adam',
loss=tf.keras.losses.categorical_crossentropy,
metrics=['accuracy'])
final_model.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=20, verbose=1)
mc = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
h_callback = final_model.fit(X_train, y_train, epochs = 100,
validation_data=(X_val, y_val), callbacks=[early_stopping,mc,reduce_lr] , batch_size=64)
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
saved_model = load_model('best_model.h5')
# evaluate the model
_, train_acc = saved_model.evaluate(X_train, y_train, verbose=0)
_, test_acc = saved_model.evaluate(X_test, y_test, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))
Model: "sequential_62"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_168 (Conv2D) (None, 26, 26, 32) 320
leaky_re_lu_15 (LeakyReLU) (None, 26, 26, 32) 0
max_pooling2d_118 (MaxPooli (None, 13, 13, 32) 0
ng2D)
batch_normalization_112 (Ba (None, 13, 13, 32) 128
tchNormalization)
dropout_166 (Dropout) (None, 13, 13, 32) 0
conv2d_169 (Conv2D) (None, 11, 11, 64) 18496
leaky_re_lu_16 (LeakyReLU) (None, 11, 11, 64) 0
batch_normalization_113 (Ba (None, 11, 11, 64) 256
tchNormalization)
max_pooling2d_119 (MaxPooli (None, 5, 5, 64) 0
ng2D)
dropout_167 (Dropout) (None, 5, 5, 64) 0
conv2d_170 (Conv2D) (None, 3, 3, 128) 73856
leaky_re_lu_17 (LeakyReLU) (None, 3, 3, 128) 0
batch_normalization_114 (Ba (None, 3, 3, 128) 512
tchNormalization)
dropout_168 (Dropout) (None, 3, 3, 128) 0
flatten_99 (Flatten) (None, 1152) 0
dense_127 (Dense) (None, 128) 147584
dropout_169 (Dropout) (None, 128) 0
flatten_100 (Flatten) (None, 128) 0
dense_128 (Dense) (None, 10) 1290
=================================================================
Total params: 242,442
Trainable params: 241,994
Non-trainable params: 448
_________________________________________________________________
Epoch 1/100
633/638 [============================>.] - ETA: 0s - loss: 1.1730 - accuracy: 0.7756
Epoch 1: val_accuracy improved from -inf to 0.84944, saving model to best_model.h5
638/638 [==============================] - 6s 8ms/step - loss: 1.1691 - accuracy: 0.7759 - val_loss: 0.5713 - val_accuracy: 0.8494 - lr: 0.0010
Epoch 2/100
637/638 [============================>.] - ETA: 0s - loss: 0.5789 - accuracy: 0.8407
Epoch 2: val_accuracy improved from 0.84944 to 0.87903, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.5790 - accuracy: 0.8406 - val_loss: 0.4495 - val_accuracy: 0.8790 - lr: 0.0010
Epoch 3/100
632/638 [============================>.] - ETA: 0s - loss: 0.5013 - accuracy: 0.8588
Epoch 3: val_accuracy improved from 0.87903 to 0.88986, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.5006 - accuracy: 0.8589 - val_loss: 0.3984 - val_accuracy: 0.8899 - lr: 0.0010
Epoch 4/100
638/638 [==============================] - ETA: 0s - loss: 0.4538 - accuracy: 0.8708
Epoch 4: val_accuracy improved from 0.88986 to 0.89681, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.4538 - accuracy: 0.8708 - val_loss: 0.3722 - val_accuracy: 0.8968 - lr: 0.0010
Epoch 5/100
638/638 [==============================] - ETA: 0s - loss: 0.4285 - accuracy: 0.8766
Epoch 5: val_accuracy improved from 0.89681 to 0.89792, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.4285 - accuracy: 0.8766 - val_loss: 0.3529 - val_accuracy: 0.8979 - lr: 0.0010
Epoch 6/100
633/638 [============================>.] - ETA: 0s - loss: 0.4084 - accuracy: 0.8821
Epoch 6: val_accuracy improved from 0.89792 to 0.89944, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.4079 - accuracy: 0.8824 - val_loss: 0.3512 - val_accuracy: 0.8994 - lr: 0.0010
Epoch 7/100
638/638 [==============================] - ETA: 0s - loss: 0.3974 - accuracy: 0.8850
Epoch 7: val_accuracy improved from 0.89944 to 0.90139, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.3974 - accuracy: 0.8850 - val_loss: 0.3465 - val_accuracy: 0.9014 - lr: 0.0010
Epoch 8/100
631/638 [============================>.] - ETA: 0s - loss: 0.3797 - accuracy: 0.8899
Epoch 8: val_accuracy did not improve from 0.90139
638/638 [==============================] - 5s 8ms/step - loss: 0.3792 - accuracy: 0.8900 - val_loss: 0.3443 - val_accuracy: 0.8982 - lr: 0.0010
Epoch 9/100
634/638 [============================>.] - ETA: 0s - loss: 0.3703 - accuracy: 0.8924
Epoch 9: val_accuracy did not improve from 0.90139
638/638 [==============================] - 5s 8ms/step - loss: 0.3703 - accuracy: 0.8924 - val_loss: 0.3558 - val_accuracy: 0.8900 - lr: 0.0010
Epoch 10/100
633/638 [============================>.] - ETA: 0s - loss: 0.3629 - accuracy: 0.8952
Epoch 10: val_accuracy improved from 0.90139 to 0.90792, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.3631 - accuracy: 0.8950 - val_loss: 0.3169 - val_accuracy: 0.9079 - lr: 0.0010
Epoch 11/100
631/638 [============================>.] - ETA: 0s - loss: 0.3562 - accuracy: 0.8960
Epoch 11: val_accuracy did not improve from 0.90792
638/638 [==============================] - 5s 8ms/step - loss: 0.3558 - accuracy: 0.8961 - val_loss: 0.3340 - val_accuracy: 0.8989 - lr: 0.0010
Epoch 12/100
637/638 [============================>.] - ETA: 0s - loss: 0.3484 - accuracy: 0.8980
Epoch 12: val_accuracy improved from 0.90792 to 0.91014, saving model to best_model.h5
638/638 [==============================] - 5s 7ms/step - loss: 0.3483 - accuracy: 0.8980 - val_loss: 0.3187 - val_accuracy: 0.9101 - lr: 0.0010
Epoch 13/100
636/638 [============================>.] - ETA: 0s - loss: 0.3443 - accuracy: 0.9007
Epoch 13: val_accuracy did not improve from 0.91014
638/638 [==============================] - 5s 8ms/step - loss: 0.3444 - accuracy: 0.9006 - val_loss: 0.3054 - val_accuracy: 0.9086 - lr: 0.0010
Epoch 14/100
635/638 [============================>.] - ETA: 0s - loss: 0.3383 - accuracy: 0.9015
Epoch 14: val_accuracy did not improve from 0.91014
638/638 [==============================] - 5s 8ms/step - loss: 0.3381 - accuracy: 0.9016 - val_loss: 0.3314 - val_accuracy: 0.9079 - lr: 0.0010
Epoch 15/100
636/638 [============================>.] - ETA: 0s - loss: 0.3338 - accuracy: 0.9044
Epoch 15: val_accuracy improved from 0.91014 to 0.91083, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.3336 - accuracy: 0.9045 - val_loss: 0.3015 - val_accuracy: 0.9108 - lr: 0.0010
Epoch 16/100
632/638 [============================>.] - ETA: 0s - loss: 0.3260 - accuracy: 0.9050
Epoch 16: val_accuracy did not improve from 0.91083
638/638 [==============================] - 5s 8ms/step - loss: 0.3263 - accuracy: 0.9050 - val_loss: 0.3298 - val_accuracy: 0.9090 - lr: 0.0010
Epoch 17/100
633/638 [============================>.] - ETA: 0s - loss: 0.3261 - accuracy: 0.9046
Epoch 17: val_accuracy improved from 0.91083 to 0.91278, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.3261 - accuracy: 0.9046 - val_loss: 0.2962 - val_accuracy: 0.9128 - lr: 0.0010
Epoch 18/100
634/638 [============================>.] - ETA: 0s - loss: 0.3212 - accuracy: 0.9072
Epoch 18: val_accuracy improved from 0.91278 to 0.91569, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.3215 - accuracy: 0.9070 - val_loss: 0.2934 - val_accuracy: 0.9157 - lr: 0.0010
Epoch 19/100
631/638 [============================>.] - ETA: 0s - loss: 0.3149 - accuracy: 0.9085
Epoch 19: val_accuracy improved from 0.91569 to 0.91597, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.3145 - accuracy: 0.9087 - val_loss: 0.2970 - val_accuracy: 0.9160 - lr: 0.0010
Epoch 20/100
635/638 [============================>.] - ETA: 0s - loss: 0.3125 - accuracy: 0.9106
Epoch 20: val_accuracy did not improve from 0.91597
638/638 [==============================] - 5s 7ms/step - loss: 0.3130 - accuracy: 0.9103 - val_loss: 0.2928 - val_accuracy: 0.9150 - lr: 0.0010
Epoch 21/100
633/638 [============================>.] - ETA: 0s - loss: 0.3067 - accuracy: 0.9108
Epoch 21: val_accuracy did not improve from 0.91597
638/638 [==============================] - 5s 7ms/step - loss: 0.3067 - accuracy: 0.9109 - val_loss: 0.2957 - val_accuracy: 0.9140 - lr: 0.0010
Epoch 22/100
637/638 [============================>.] - ETA: 0s - loss: 0.3031 - accuracy: 0.9116
Epoch 22: val_accuracy did not improve from 0.91597
638/638 [==============================] - 5s 7ms/step - loss: 0.3031 - accuracy: 0.9116 - val_loss: 0.3024 - val_accuracy: 0.9114 - lr: 0.0010
Epoch 23/100
638/638 [==============================] - ETA: 0s - loss: 0.3035 - accuracy: 0.9126
Epoch 23: val_accuracy did not improve from 0.91597
638/638 [==============================] - 5s 7ms/step - loss: 0.3035 - accuracy: 0.9126 - val_loss: 0.2958 - val_accuracy: 0.9154 - lr: 0.0010
Epoch 24/100
636/638 [============================>.] - ETA: 0s - loss: 0.3006 - accuracy: 0.9136
Epoch 24: val_accuracy improved from 0.91597 to 0.91792, saving model to best_model.h5
638/638 [==============================] - 5s 7ms/step - loss: 0.3010 - accuracy: 0.9136 - val_loss: 0.2892 - val_accuracy: 0.9179 - lr: 0.0010
Epoch 25/100
638/638 [==============================] - ETA: 0s - loss: 0.2979 - accuracy: 0.9138
Epoch 25: val_accuracy did not improve from 0.91792
638/638 [==============================] - 5s 8ms/step - loss: 0.2979 - accuracy: 0.9138 - val_loss: 0.3030 - val_accuracy: 0.9107 - lr: 0.0010
Epoch 26/100
634/638 [============================>.] - ETA: 0s - loss: 0.3003 - accuracy: 0.9135
Epoch 26: val_accuracy did not improve from 0.91792
638/638 [==============================] - 5s 7ms/step - loss: 0.3001 - accuracy: 0.9137 - val_loss: 0.2921 - val_accuracy: 0.9169 - lr: 0.0010
Epoch 27/100
634/638 [============================>.] - ETA: 0s - loss: 0.2933 - accuracy: 0.9147
Epoch 27: val_accuracy improved from 0.91792 to 0.91833, saving model to best_model.h5
638/638 [==============================] - 5s 7ms/step - loss: 0.2930 - accuracy: 0.9147 - val_loss: 0.2880 - val_accuracy: 0.9183 - lr: 0.0010
Epoch 28/100
638/638 [==============================] - ETA: 0s - loss: 0.2933 - accuracy: 0.9144
Epoch 28: val_accuracy did not improve from 0.91833
638/638 [==============================] - 5s 7ms/step - loss: 0.2933 - accuracy: 0.9144 - val_loss: 0.3054 - val_accuracy: 0.9131 - lr: 0.0010
Epoch 29/100
633/638 [============================>.] - ETA: 0s - loss: 0.2898 - accuracy: 0.9173
Epoch 29: val_accuracy did not improve from 0.91833
638/638 [==============================] - 5s 7ms/step - loss: 0.2902 - accuracy: 0.9172 - val_loss: 0.2900 - val_accuracy: 0.9161 - lr: 0.0010
Epoch 30/100
635/638 [============================>.] - ETA: 0s - loss: 0.2871 - accuracy: 0.9182
Epoch 30: val_accuracy did not improve from 0.91833
638/638 [==============================] - 5s 8ms/step - loss: 0.2871 - accuracy: 0.9182 - val_loss: 0.3042 - val_accuracy: 0.9133 - lr: 0.0010
Epoch 31/100
638/638 [==============================] - ETA: 0s - loss: 0.2845 - accuracy: 0.9175
Epoch 31: val_accuracy did not improve from 0.91833
638/638 [==============================] - 5s 8ms/step - loss: 0.2845 - accuracy: 0.9175 - val_loss: 0.2935 - val_accuracy: 0.9126 - lr: 0.0010
Epoch 32/100
636/638 [============================>.] - ETA: 0s - loss: 0.2827 - accuracy: 0.9189
Epoch 32: val_accuracy did not improve from 0.91833
638/638 [==============================] - 5s 8ms/step - loss: 0.2826 - accuracy: 0.9189 - val_loss: 0.3002 - val_accuracy: 0.9137 - lr: 0.0010
Epoch 33/100
633/638 [============================>.] - ETA: 0s - loss: 0.2829 - accuracy: 0.9187
Epoch 33: val_accuracy did not improve from 0.91833
638/638 [==============================] - 5s 8ms/step - loss: 0.2831 - accuracy: 0.9186 - val_loss: 0.3013 - val_accuracy: 0.9118 - lr: 0.0010
Epoch 34/100
634/638 [============================>.] - ETA: 0s - loss: 0.2824 - accuracy: 0.9178
Epoch 34: val_accuracy did not improve from 0.91833
638/638 [==============================] - 5s 8ms/step - loss: 0.2818 - accuracy: 0.9180 - val_loss: 0.2865 - val_accuracy: 0.9162 - lr: 0.0010
Epoch 35/100
638/638 [==============================] - ETA: 0s - loss: 0.2806 - accuracy: 0.9195
Epoch 35: val_accuracy did not improve from 0.91833
638/638 [==============================] - 5s 8ms/step - loss: 0.2806 - accuracy: 0.9195 - val_loss: 0.2838 - val_accuracy: 0.9174 - lr: 0.0010
Epoch 36/100
633/638 [============================>.] - ETA: 0s - loss: 0.2788 - accuracy: 0.9206
Epoch 36: val_accuracy did not improve from 0.91833
638/638 [==============================] - 5s 8ms/step - loss: 0.2788 - accuracy: 0.9206 - val_loss: 0.2916 - val_accuracy: 0.9167 - lr: 0.0010
Epoch 37/100
634/638 [============================>.] - ETA: 0s - loss: 0.2786 - accuracy: 0.9201
Epoch 37: val_accuracy improved from 0.91833 to 0.92208, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.2787 - accuracy: 0.9200 - val_loss: 0.2789 - val_accuracy: 0.9221 - lr: 0.0010
Epoch 38/100
634/638 [============================>.] - ETA: 0s - loss: 0.2752 - accuracy: 0.9208
Epoch 38: val_accuracy did not improve from 0.92208
638/638 [==============================] - 5s 8ms/step - loss: 0.2754 - accuracy: 0.9207 - val_loss: 0.3005 - val_accuracy: 0.9153 - lr: 0.0010
Epoch 39/100
638/638 [==============================] - ETA: 0s - loss: 0.2742 - accuracy: 0.9219
Epoch 39: val_accuracy did not improve from 0.92208
638/638 [==============================] - 5s 8ms/step - loss: 0.2742 - accuracy: 0.9219 - val_loss: 0.3021 - val_accuracy: 0.9132 - lr: 0.0010
Epoch 40/100
638/638 [==============================] - ETA: 0s - loss: 0.2742 - accuracy: 0.9239
Epoch 40: val_accuracy did not improve from 0.92208
638/638 [==============================] - 5s 8ms/step - loss: 0.2742 - accuracy: 0.9239 - val_loss: 0.2911 - val_accuracy: 0.9221 - lr: 0.0010
Epoch 41/100
636/638 [============================>.] - ETA: 0s - loss: 0.2702 - accuracy: 0.9220
Epoch 41: val_accuracy did not improve from 0.92208
638/638 [==============================] - 5s 8ms/step - loss: 0.2704 - accuracy: 0.9220 - val_loss: 0.2936 - val_accuracy: 0.9131 - lr: 0.0010
Epoch 42/100
633/638 [============================>.] - ETA: 0s - loss: 0.2717 - accuracy: 0.9224
Epoch 42: val_accuracy did not improve from 0.92208
638/638 [==============================] - 5s 8ms/step - loss: 0.2713 - accuracy: 0.9225 - val_loss: 0.2981 - val_accuracy: 0.9178 - lr: 0.0010
Epoch 43/100
631/638 [============================>.] - ETA: 0s - loss: 0.2702 - accuracy: 0.9226
Epoch 43: val_accuracy did not improve from 0.92208
638/638 [==============================] - 5s 8ms/step - loss: 0.2704 - accuracy: 0.9226 - val_loss: 0.2885 - val_accuracy: 0.9190 - lr: 0.0010
Epoch 44/100
636/638 [============================>.] - ETA: 0s - loss: 0.2694 - accuracy: 0.9228
Epoch 44: val_accuracy did not improve from 0.92208
638/638 [==============================] - 5s 8ms/step - loss: 0.2693 - accuracy: 0.9228 - val_loss: 0.2985 - val_accuracy: 0.9187 - lr: 0.0010
Epoch 45/100
638/638 [==============================] - ETA: 0s - loss: 0.2640 - accuracy: 0.9249
Epoch 45: val_accuracy did not improve from 0.92208
638/638 [==============================] - 5s 8ms/step - loss: 0.2640 - accuracy: 0.9249 - val_loss: 0.2872 - val_accuracy: 0.9200 - lr: 0.0010
Epoch 46/100
631/638 [============================>.] - ETA: 0s - loss: 0.2670 - accuracy: 0.9238
Epoch 46: val_accuracy did not improve from 0.92208
638/638 [==============================] - 5s 8ms/step - loss: 0.2665 - accuracy: 0.9241 - val_loss: 0.3029 - val_accuracy: 0.9176 - lr: 0.0010
Epoch 47/100
634/638 [============================>.] - ETA: 0s - loss: 0.2669 - accuracy: 0.9238
Epoch 47: val_accuracy did not improve from 0.92208
638/638 [==============================] - 5s 8ms/step - loss: 0.2667 - accuracy: 0.9238 - val_loss: 0.3104 - val_accuracy: 0.9139 - lr: 0.0010
Epoch 48/100
637/638 [============================>.] - ETA: 0s - loss: 0.2690 - accuracy: 0.9236
Epoch 48: val_accuracy improved from 0.92208 to 0.92222, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.2690 - accuracy: 0.9236 - val_loss: 0.2793 - val_accuracy: 0.9222 - lr: 0.0010
Epoch 49/100
637/638 [============================>.] - ETA: 0s - loss: 0.2615 - accuracy: 0.9258
Epoch 49: val_accuracy did not improve from 0.92222
638/638 [==============================] - 5s 8ms/step - loss: 0.2616 - accuracy: 0.9258 - val_loss: 0.2804 - val_accuracy: 0.9214 - lr: 0.0010
Epoch 50/100
638/638 [==============================] - ETA: 0s - loss: 0.2666 - accuracy: 0.9252
Epoch 50: val_accuracy did not improve from 0.92222
638/638 [==============================] - 5s 8ms/step - loss: 0.2666 - accuracy: 0.9252 - val_loss: 0.2794 - val_accuracy: 0.9211 - lr: 0.0010
Epoch 51/100
633/638 [============================>.] - ETA: 0s - loss: 0.2641 - accuracy: 0.9252
Epoch 51: val_accuracy did not improve from 0.92222
638/638 [==============================] - 5s 8ms/step - loss: 0.2641 - accuracy: 0.9252 - val_loss: 0.3096 - val_accuracy: 0.9106 - lr: 0.0010
Epoch 52/100
637/638 [============================>.] - ETA: 0s - loss: 0.2637 - accuracy: 0.9238
Epoch 52: val_accuracy did not improve from 0.92222
638/638 [==============================] - 5s 8ms/step - loss: 0.2637 - accuracy: 0.9237 - val_loss: 0.3016 - val_accuracy: 0.9156 - lr: 0.0010
Epoch 53/100
633/638 [============================>.] - ETA: 0s - loss: 0.2652 - accuracy: 0.9249
Epoch 53: val_accuracy improved from 0.92222 to 0.92319, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.2651 - accuracy: 0.9249 - val_loss: 0.2852 - val_accuracy: 0.9232 - lr: 0.0010
Epoch 54/100
635/638 [============================>.] - ETA: 0s - loss: 0.2601 - accuracy: 0.9265
Epoch 54: val_accuracy improved from 0.92319 to 0.92389, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.2599 - accuracy: 0.9265 - val_loss: 0.2870 - val_accuracy: 0.9239 - lr: 0.0010
Epoch 55/100
637/638 [============================>.] - ETA: 0s - loss: 0.2610 - accuracy: 0.9275
Epoch 55: val_accuracy did not improve from 0.92389
638/638 [==============================] - 5s 8ms/step - loss: 0.2610 - accuracy: 0.9275 - val_loss: 0.2932 - val_accuracy: 0.9175 - lr: 0.0010
Epoch 56/100
636/638 [============================>.] - ETA: 0s - loss: 0.2581 - accuracy: 0.9264
Epoch 56: val_accuracy did not improve from 0.92389
638/638 [==============================] - 5s 8ms/step - loss: 0.2582 - accuracy: 0.9263 - val_loss: 0.2779 - val_accuracy: 0.9232 - lr: 0.0010
Epoch 57/100
633/638 [============================>.] - ETA: 0s - loss: 0.2588 - accuracy: 0.9266
Epoch 57: val_accuracy did not improve from 0.92389
638/638 [==============================] - 5s 8ms/step - loss: 0.2589 - accuracy: 0.9265 - val_loss: 0.2855 - val_accuracy: 0.9229 - lr: 0.0010
Epoch 58/100
634/638 [============================>.] - ETA: 0s - loss: 0.2589 - accuracy: 0.9268
Epoch 58: val_accuracy did not improve from 0.92389
638/638 [==============================] - 5s 8ms/step - loss: 0.2589 - accuracy: 0.9268 - val_loss: 0.2877 - val_accuracy: 0.9208 - lr: 0.0010
Epoch 59/100
635/638 [============================>.] - ETA: 0s - loss: 0.2545 - accuracy: 0.9277
Epoch 59: val_accuracy did not improve from 0.92389
638/638 [==============================] - 5s 8ms/step - loss: 0.2546 - accuracy: 0.9276 - val_loss: 0.2835 - val_accuracy: 0.9233 - lr: 0.0010
Epoch 60/100
636/638 [============================>.] - ETA: 0s - loss: 0.2554 - accuracy: 0.9274
Epoch 60: val_accuracy did not improve from 0.92389
638/638 [==============================] - 5s 8ms/step - loss: 0.2554 - accuracy: 0.9274 - val_loss: 0.2821 - val_accuracy: 0.9212 - lr: 0.0010
Epoch 61/100
636/638 [============================>.] - ETA: 0s - loss: 0.2569 - accuracy: 0.9278
Epoch 61: val_accuracy improved from 0.92389 to 0.92542, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.2567 - accuracy: 0.9279 - val_loss: 0.2683 - val_accuracy: 0.9254 - lr: 0.0010
Epoch 62/100
634/638 [============================>.] - ETA: 0s - loss: 0.2490 - accuracy: 0.9289
Epoch 62: val_accuracy did not improve from 0.92542
638/638 [==============================] - 5s 8ms/step - loss: 0.2495 - accuracy: 0.9286 - val_loss: 0.2788 - val_accuracy: 0.9203 - lr: 0.0010
Epoch 63/100
631/638 [============================>.] - ETA: 0s - loss: 0.2471 - accuracy: 0.9305
Epoch 63: val_accuracy did not improve from 0.92542
638/638 [==============================] - 5s 8ms/step - loss: 0.2475 - accuracy: 0.9305 - val_loss: 0.3199 - val_accuracy: 0.9107 - lr: 0.0010
Epoch 64/100
638/638 [==============================] - ETA: 0s - loss: 0.2538 - accuracy: 0.9293
Epoch 64: val_accuracy improved from 0.92542 to 0.92639, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.2538 - accuracy: 0.9293 - val_loss: 0.2771 - val_accuracy: 0.9264 - lr: 0.0010
Epoch 65/100
637/638 [============================>.] - ETA: 0s - loss: 0.2537 - accuracy: 0.9281
Epoch 65: val_accuracy did not improve from 0.92639
638/638 [==============================] - 5s 8ms/step - loss: 0.2536 - accuracy: 0.9281 - val_loss: 0.2719 - val_accuracy: 0.9264 - lr: 0.0010
Epoch 66/100
631/638 [============================>.] - ETA: 0s - loss: 0.2490 - accuracy: 0.9304
Epoch 66: val_accuracy did not improve from 0.92639
638/638 [==============================] - 5s 8ms/step - loss: 0.2492 - accuracy: 0.9303 - val_loss: 0.2777 - val_accuracy: 0.9229 - lr: 0.0010
Epoch 67/100
635/638 [============================>.] - ETA: 0s - loss: 0.2501 - accuracy: 0.9303
Epoch 67: val_accuracy did not improve from 0.92639
638/638 [==============================] - 5s 8ms/step - loss: 0.2504 - accuracy: 0.9303 - val_loss: 0.2937 - val_accuracy: 0.9225 - lr: 0.0010
Epoch 68/100
632/638 [============================>.] - ETA: 0s - loss: 0.2499 - accuracy: 0.9310
Epoch 68: val_accuracy did not improve from 0.92639
638/638 [==============================] - 5s 8ms/step - loss: 0.2497 - accuracy: 0.9311 - val_loss: 0.2846 - val_accuracy: 0.9257 - lr: 0.0010
Epoch 69/100
635/638 [============================>.] - ETA: 0s - loss: 0.2534 - accuracy: 0.9300
Epoch 69: val_accuracy did not improve from 0.92639
638/638 [==============================] - 5s 8ms/step - loss: 0.2533 - accuracy: 0.9300 - val_loss: 0.2800 - val_accuracy: 0.9249 - lr: 0.0010
Epoch 70/100
635/638 [============================>.] - ETA: 0s - loss: 0.2490 - accuracy: 0.9299
Epoch 70: val_accuracy did not improve from 0.92639
638/638 [==============================] - 5s 8ms/step - loss: 0.2494 - accuracy: 0.9299 - val_loss: 0.2820 - val_accuracy: 0.9175 - lr: 0.0010
Epoch 71/100
635/638 [============================>.] - ETA: 0s - loss: 0.2509 - accuracy: 0.9300
Epoch 71: val_accuracy did not improve from 0.92639
638/638 [==============================] - 5s 8ms/step - loss: 0.2508 - accuracy: 0.9300 - val_loss: 0.2995 - val_accuracy: 0.9140 - lr: 0.0010
Epoch 72/100
634/638 [============================>.] - ETA: 0s - loss: 0.2485 - accuracy: 0.9295
Epoch 72: val_accuracy did not improve from 0.92639
638/638 [==============================] - 5s 8ms/step - loss: 0.2486 - accuracy: 0.9294 - val_loss: 0.3053 - val_accuracy: 0.9143 - lr: 0.0010
Epoch 73/100
633/638 [============================>.] - ETA: 0s - loss: 0.2466 - accuracy: 0.9302
Epoch 73: val_accuracy did not improve from 0.92639
638/638 [==============================] - 5s 8ms/step - loss: 0.2466 - accuracy: 0.9301 - val_loss: 0.2834 - val_accuracy: 0.9247 - lr: 0.0010
Epoch 74/100
634/638 [============================>.] - ETA: 0s - loss: 0.2447 - accuracy: 0.9304
Epoch 74: val_accuracy did not improve from 0.92639
638/638 [==============================] - 5s 8ms/step - loss: 0.2445 - accuracy: 0.9305 - val_loss: 0.2814 - val_accuracy: 0.9236 - lr: 0.0010
Epoch 75/100
631/638 [============================>.] - ETA: 0s - loss: 0.2463 - accuracy: 0.9308
Epoch 75: val_accuracy did not improve from 0.92639
638/638 [==============================] - 5s 8ms/step - loss: 0.2464 - accuracy: 0.9307 - val_loss: 0.2843 - val_accuracy: 0.9193 - lr: 0.0010
Epoch 76/100
636/638 [============================>.] - ETA: 0s - loss: 0.2454 - accuracy: 0.9317
Epoch 76: val_accuracy did not improve from 0.92639
638/638 [==============================] - 5s 8ms/step - loss: 0.2455 - accuracy: 0.9316 - val_loss: 0.2942 - val_accuracy: 0.9212 - lr: 0.0010
Epoch 77/100
632/638 [============================>.] - ETA: 0s - loss: 0.2458 - accuracy: 0.9312
Epoch 77: val_accuracy did not improve from 0.92639
638/638 [==============================] - 5s 8ms/step - loss: 0.2456 - accuracy: 0.9313 - val_loss: 0.2772 - val_accuracy: 0.9246 - lr: 0.0010
Epoch 78/100
637/638 [============================>.] - ETA: 0s - loss: 0.2440 - accuracy: 0.9326
Epoch 78: val_accuracy did not improve from 0.92639
638/638 [==============================] - 5s 8ms/step - loss: 0.2440 - accuracy: 0.9326 - val_loss: 0.2832 - val_accuracy: 0.9221 - lr: 0.0010
Epoch 79/100
634/638 [============================>.] - ETA: 0s - loss: 0.2408 - accuracy: 0.9318
Epoch 79: val_accuracy improved from 0.92639 to 0.92681, saving model to best_model.h5
638/638 [==============================] - 5s 8ms/step - loss: 0.2408 - accuracy: 0.9318 - val_loss: 0.2733 - val_accuracy: 0.9268 - lr: 0.0010
Epoch 80/100
637/638 [============================>.] - ETA: 0s - loss: 0.2455 - accuracy: 0.9331
Epoch 80: val_accuracy did not improve from 0.92681
638/638 [==============================] - 5s 8ms/step - loss: 0.2455 - accuracy: 0.9331 - val_loss: 0.2819 - val_accuracy: 0.9231 - lr: 0.0010
Epoch 81/100
633/638 [============================>.] - ETA: 0s - loss: 0.2460 - accuracy: 0.9323
Epoch 81: val_accuracy did not improve from 0.92681
638/638 [==============================] - 5s 8ms/step - loss: 0.2460 - accuracy: 0.9323 - val_loss: 0.2897 - val_accuracy: 0.9199 - lr: 0.0010
Epoch 81: early stopping
Train: 0.969, Test: 0.930
pred = final_model.predict(X_test)
pred = np.argmax(pred, axis=1)
classifation_matrix = confusion_matrix(y_test_label, pred)
# plot confusion matrix
plt.figure(figsize=(10,10))
sns.heatmap(classifation_matrix, annot=True, fmt="d")
plt.title("Confusion matrix")
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
375/375 [==============================] - 1s 2ms/step